Commit a0d1bf13 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

add the 454_16SrRNA pipeline

parent 8fb6a8f2
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>nG6-current</name>
<name>nG6-meta2</name>
<comment></comment>
<projects>
</projects>
......
#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
BEGIN{foreach (@INC) {s/\/usr\/local\/packages/\/local\/platform/}};
use lib (@INC,$ENV{"PERL_MOD_DIR"});
no lib "$ENV{PERL_MOD_DIR}/i686-linux";
no lib ".";
=head1 NAME
create_mothur_classify_seqs_iterator_list.pl - Default output is a workflow iterator that can be used to iterator over input for remove.seqs
=head1 SYNOPSIS
USAGE: ./create_mothur_remove_seqs_iterator_list.pl --fasta_file_list=/path/to/fasta/file/list --names_file_list=/path/to/name/files/list
--groups_file_list=/path/to/name/groups/list --output=/path/to/output/iterator
=head1 OPTIONS
B<--fasta_file_list, -f>
A list of fasta files.
B<--names_file_list, -n>
A list of mothur unique.seqs generated name files.
B<--groups_file_list, -g>
A list of mothur trim.seqs generated group files.
B<--output, -o>
Desired path to output iterator file
B<--log, -l>
Optional. Log file.
B<--debug, -d>
Optional. Debug level.
B<--help>
Print perldocs for this script.
=head1 DESCRIPTION
Creates an ergatis/workflow iterator list file for a distributed mothur remove.seqs job. The iterator contains all the parameters needed to run
classify.seqs successfully, attempting to pair up groups of files (fasta file - name file - group file) based of the base filename prefix
(e.x. AMP01_LUNG.trim.unique.fasta - AMP01_LUNG.trim.names - AMP01_LUNG.trim.unique.group would be grouped together as they all carry the AMP01_LUNG prefix).
=head1 INPUT
The only mandatory input file is the fasta file. Two optional files can be provided
1.) name file
2.) group file
=head1 OUTPUT
An ergatis iterator list.
=head1 CONTACT
Jerome Mariette
Jerome.Mariette@toulouse.inra.fr
=cut
use strict;
use warnings;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use Pod::Usage;
use File::Basename;
use Ergatis::Logger;
umask(0000);
my $logger;
my %options = &parse_options();
my $fasta_file = $options{'fasta_file'};
my $fasta_list = $options{'fasta_file_list'};
my $names_list = $options{'names_file_list'};
my $groups_list = $options{'groups_file_list'};
my $output = $options{'output'};
my @fasta_files = parse_fasta_files($fasta_file, $fasta_list);
my $name_files = parse_list_file($names_list) if defined ($options{'names_file_list'});
my $group_files = parse_list_file($groups_list) if defined ($options{'names_file_list'});
open (OUTFILE, "> $output") or $logger->logdie("Could not open output iterator $output for writing: $!");
print OUTFILE '$;I_FILE_BASE$;' . "\t" .
'$;I_FILE_NAME$;' . "\t" .
'$;I_FILE_PATH$;' . "\t" .
'$;NAME_FILE$;' . "\t" .
'$;GROUP_FILE$;' . "\n";
foreach my $fasta (@fasta_files) {
my $filename = basename($fasta);
my $file_base = fileparse($fasta, '\.(.*)');
my $name = $name_files->{$file_base};
my $group = $group_files->{$file_base};
print OUTFILE "$file_base\t$filename\t$fasta\t$name\t$group\n";
}
close OUTFILE;
#########################################################################
# #
# SUBROUTINES #
# #
#########################################################################
## Parses a list file, creating a hash containing file prefix as key and
## absolute filename as value.
sub parse_list_file {
my $file = shift;
my $files = ();
open (FILELIST, $file) or $logger->logdie("Could not open list $file: $!");
while (my $line = <FILELIST>) {
chomp ($line);
my $file_prefix = fileparse($line, '\.(.*)');
if ( &verify_file($line) && !( exists($files->{$file_prefix}) ) ) {
$files->{$file_prefix} = $line;
} else {
$logger->logwarn("Duplicate file prefix found for file $line");
}
}
close (FILELIST);
return $files;
}
## Parse all our fasta files and return an array containing all verified files
sub parse_fasta_files {
my ($fasta_file, $fasta_list) = @_;
my @files;
## Handle a single alignment file being passed in...
push (@files, $fasta_file) if ( defined($fasta_file) && &verify_file($fasta_file) );
if ( &verify_file($fasta_list) ) {
open (FASTALIST, $fasta_list) or $logger->logdie("Could not open fasta file list $fasta_list: $!");
while (my $line = <FASTALIST>) {
chomp ($line);
push (@files, $line) if ( &verify_file($line) );
}
close (FASTALIST);
}
$logger->logdie("No fasta files found in input provided.") if (scalar @files == 0);
return @files;
}
## Verify a file to make sure it exists, is readable and is not zero-content.
sub verify_file {
my @files = @_;
foreach my $file (@files) {
next if ( (-e $file) && (-r $file) && (-s $file) );
if (!-e $file) { $logger->logdie("File $file does not exist") }
elsif (!-r $file) { $logger->logdie("File $file is not readable") }
elsif (!-s $file) { $logger->logdie("File $file has zero content") }
}
return 1;
}
sub parse_options {
my %opts = ();
GetOptions(\%opts,
'fasta_file|i=s',
'fasta_file_list|f=s',
'names_file_list|n=s',
'groups_file_list|g=s',
'output|o=s',
'log|l=s',
'debug|d=s',
'help') || pod2usage();
if ($opts{'help'}) {
pod2usage ( { -exitval => 0, -verbose => 2, -output => \*STDERR } );
}
## Initialize and configure logging...
my $logfile = $opts{'log'} || Ergatis::Logger::get_default_filename();
my $debug = $opts{'debug'} ||= 4;
$logger = new Ergatis::Logger( 'LOG_FILE' => $logfile,
'LOG_LEVEL' => $debug );
$logger = Ergatis::Logger::get_logger();
## Check to make sure certain parameters are defined...
defined ($opts{'fasta_file_list'}) || $logger->logdie("Please specify a valid fasta file list.");
defined ($opts{'output'}) || $logger->logdie("Please specify an output iterator file.");
return %opts;
}
\ No newline at end of file
#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
BEGIN{foreach (@INC) {s/\/usr\/local\/packages/\/local\/platform/}};
use lib (@INC,$ENV{"PERL_MOD_DIR"});
no lib "$ENV{PERL_MOD_DIR}/i686-linux";
no lib ".";
=head1 NAME
create_mothur_cluster_iterator_list.pl - Default output is a workflow iterator that can be used to iterator over input for cluster
=head1 SYNOPSIS
USAGE: ./create_mothur_cluster_iterator_list.pl --distance_file=/path/to/distance/file --distance_file_list=/path/to/distance/file
--names_file_list=/path/to/name/files/list --groups_file=/path/to/group/files/list
--output=/path/to/output/iterator
=head1 OPTIONS
B<--distance_file, -i>
A single sequence file in FASTA format
B<--distance_file_list, -s>
A list of sequence files in FASTA format
B<--names_file_list, -n>
A list of mothur unique.seqs generated name files.
B<--groups_file_list, -n>
A list of group files.
B<--output, -o>
Desired path to output iterator file
B<--log, -l>
Optional. Log file.
B<--debug, -d>
Optional. Debug level.
B<--help>
Print perldocs for this script.
=head1 DESCRIPTION
Creates an ergatis/workflow iterator list file for a distributed mothur unique.seqs job. The iterator contains all the parameters needed to run
cluster successfully, attempting to pair up groups of files (distance file - name file - group file) based of the base filename prefix
(e.x. AMP01_LUNG.trim.dist - AMP01_LUNG.trim.names would be grouped together as they all carry the AMP01_LUNG prefix).
=head1 INPUT
The only mandatory input file is the distance file. Optionanlly a names file and a group file generated by trim.seqs can also be included.
=head1 OUTPUT
An ergatis iterator list.
=head1 CONTACT
Jerome Mariette
Jerome.Mariette@toulouse.inra.fr
=cut
use strict;
use warnings;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use Pod::Usage;
use File::Basename;
use Ergatis::Logger;
umask(0000);
my $logger;
my %options = &parse_options();
my $distance_file = $options{'distance_file'};
my $distance_list = $options{'distance_file_list'};
my $names_list = $options{'names_file_list'};
my $groups_list = $options{'groups_file_list'};
my $output = $options{'output'};
# Parse input...
my @distance_files = parse_distance_files($distance_file, $distance_list);
my $name_files = parse_list_file($names_list) if defined ($options{'names_file_list'});
my $group_files = parse_list_file($groups_list) if defined ($options{'groups_file_list'});
open (OUTFILE, "> $output") or $logger->logdie("Could not open output iterator $output for writing: $!");
print OUTFILE '$;I_FILE_BASE$;' . "\t" .
'$;I_FILE_NAME$;' . "\t" .
'$;I_FILE_PATH$;' . "\t" .
'$;NAME_FILE$;' . "\t" .
'$;GROUP_FILE$;' . "\n";
# Iterate over all distance files and if a corresponding name file exists print it out
# to the iterator list as well.
foreach my $distance (@distance_files) {
my $filename = basename($distance);
my $file_base = fileparse($distance, '\.(.*)');
my $name = $name_files->{$file_base};
my $group = $group_files->{$file_base};
print OUTFILE "$file_base\t$filename\t$distance\t$name\t$group\n";
}
close OUTFILE;
#########################################################################
# #
# SUBROUTINES #
# #
#########################################################################
## Parses a list file, creating a hash containing file prefix as key and
## absolute filename as value.
sub parse_list_file {
my $file = shift;
my $files = ();
open (FILELIST, $file) or $logger->logdie("Could not open list $file: $!");
while (my $line = <FILELIST>) {
chomp ($line);
my $file_prefix = fileparse($line, '\.(.*)');
if ( &verify_file($line) && !( exists($files->{$file_prefix}) ) ) {
$files->{$file_prefix} = $line;
} else {
$logger->logwarn("Duplicate file prefix found for file $line");
}
}
close (FILELIST);
return $files;
}
# Parses list of distance files returning an array containing absolute
# paths to each distance file.
sub parse_distance_files {
my ($distance_file, $distance_list) = @_;
my @files;
## Handle a single distance file being passed in...
push (@files, $distance_file) if ( defined($distance_file) && &verify_file($distance_file) );
if ( &verify_file($distance_list) ) {
open (DISTLIST, $distance_list) or $logger->logdie("Could not open distance file list $distance_list: $!");
while (my $line = <DISTLIST>) {
chomp ($line);
push (@files, $line) if ( &verify_file($line) );
}
close (DISTLIST);
}
$logger->logdie("No distance files found in input provided.") if (scalar @files == 0);
return @files;
}
# Verifies that a file exists, is readable and is not zero-content.
sub verify_file {
my @files = @_;
foreach my $file (@files) {
next if ( (-e $file) && (-r $file) && (-s $file) );
if (!-e $file) { $logger->logdie("File $file does not exist") }
elsif (!-r $file) { $logger->logdie("File $file is not readable") }
elsif (!-s $file) { $logger->logdie("File $file has zero content") }
}
return 1;
}
sub parse_options {
my %opts = ();
GetOptions(\%opts,
'distance_file|i=s',
'distance_file_list|a=s',
'names_file_list|n=s',
'groups_file_list|n=s',
'output|o=s',
'log|l=s',
'debug|d=s',
'help') || pod2usage();
if ($opts{'help'}) {
pod2usage ( { -exitval => 0, -verbose => 2, -output => \*STDERR } );
}
## Initialize and configure logging...
my $logfile = $opts{'log'} || Ergatis::Logger::get_default_filename();
my $debug = $opts{'debug'} ||= 4;
$logger = new Ergatis::Logger( 'LOG_FILE' => $logfile,
'LOG_LEVEL' => $debug );
$logger = Ergatis::Logger::get_logger();
## Check to make sure certain parameters are defined...
defined ($opts{'output'}) || $logger->logdie("Please specify an output iterator file.");
return %opts;
}
\ No newline at end of file
#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
BEGIN{foreach (@INC) {s/\/usr\/local\/packages/\/local\/platform/}};
use lib (@INC,$ENV{"PERL_MOD_DIR"});
no lib "$ENV{PERL_MOD_DIR}/i686-linux";
no lib ".";
=head1 NAME
create_mothur_remove_seqs_iterator_list.pl - Default output is a workflow iterator that can be used to iterator over input for remove.seqs
=head1 SYNOPSIS
USAGE: ./create_mothur_pre_cluster_iterator_list.pl --fasta_file_list=/path/to/accnos/files/list --names_file_list=/path/to/name/files/list
--output=/path/to/output/iterator
=head1 OPTIONS
B<--fasta_file_list, -f>
A list of fasta files.
B<--names_file_list, -n>
A list of mothur unique.seqs generated name files.
B<--output, -o>
Desired path to output iterator file
B<--log, -l>
Optional. Log file.
B<--debug, -d>
Optional. Debug level.
B<--help>
Print perldocs for this script.
=head1 DESCRIPTION
Creates an ergatis/workflow iterator list file for a distributed mothur pre.cluster job. The iterator contains all the parameters needed to run
pre.cluster successfully, attempting to pair up groups of files (fasta file - name file) based of the base filename prefix
(e.x. AMP01_LUNG.trim.unique.fna - AMP01_LUNG.trim.names would be grouped together as they all carry the AMP01_LUNG prefix).
=head1 INPUT
Both fasta and name files are mandatory.
=head1 OUTPUT
An ergatis iterator list.
=head1 CONTACT
Jerome Mariette
Jerome.Mariette@toulouse.inra.fr
=cut
use strict;
use warnings;
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
use Pod::Usage;
use File::Basename;
use Ergatis::Logger;
umask(0000);
my $logger;
my %options = &parse_options();
my $fasta_file = $options{'fasta_file'};
my $fasta_list = $options{'fasta_file_list'};
my $names_list = $options{'names_file_list'};
my $output = $options{'output'};
my @fasta_files = parse_fasta_files($fasta_file, $fasta_list);
my $name_files = parse_list_file($names_list) if defined ($options{'names_file_list'});
open (OUTFILE, "> $output") or $logger->logdie("Could not open output iterator $output for writing: $!");
print OUTFILE '$;I_FILE_BASE$;' . "\t" .
'$;I_FILE_NAME$;' . "\t" .
'$;I_FILE_PATH$;' . "\t" .
'$;NAME_FILE$;' . "\n";
foreach my $fasta (@fasta_files) {
my $filename = basename($fasta);
my $file_base = fileparse($fasta, '\.(.*)');
my $name = $name_files->{$file_base};
print OUTFILE "$file_base\t$filename\t$fasta\t$name\n";
}
close OUTFILE;
#########################################################################
# #
# SUBROUTINES #
# #
#########################################################################
## Parses a list file, creating a hash containing file prefix as key and
## absolute filename as value.
sub parse_list_file {
my $file = shift;
my $files = ();
open (FILELIST, $file) or $logger->logdie("Could not open list $file: $!");
while (my $line = <FILELIST>) {
chomp ($line);
my $file_prefix = fileparse($line, '\.(.*)');
if ( &verify_file($line) && !( exists($files->{$file_prefix}) ) ) {
$files->{$file_prefix} = $line;
} else {
$logger->logwarn("Duplicate file prefix found for file $line");
}
}
close (FILELIST);
return $files;
}
## Parse all our accnos files and return an array containing all verified files
sub parse_fasta_files {
my ($fasta_file, $fasta_list) = @_;
my @files;
## Handle a single alignment file being passed in...
push (@files, $fasta_file) if ( defined($fasta_file) && &verify_file($fasta_file) );
if ( &verify_file($fasta_list) ) {
open (FASTALIST, $fasta_list) or $logger->logdie("Could not open fasta file list $fasta_list: $!");
while (my $line = <FASTALIST>) {
chomp ($line);
push (@files, $line) if ( &verify_file($line) );
}
close (FASTALIST);
}
$logger->logdie("No fasta files found in input provided.") if (scalar @files == 0);
return @files;
}
## Verify a file to make sure it exists, is readable and is not zero-content.
sub verify_file {
my @files = @_;
foreach my $file (@files) {
next if ( (-e $file) && (-r $file) && (-s $file) );
if (!-e $file) { $logger->logdie("File $file does not exist") }
elsif (!-r $file) { $logger->logdie("File $file is not readable") }
elsif (!-s $file) { $logger->logdie("File $file has zero content") }
}
return 1;
}
sub parse_options {
my %opts = ();
GetOptions(\%opts,
'fasta_file|i=s',
'fasta_file_list|f=s',
'names_file_list|n=s',
'output|o=s',
'log|l=s',
'debug|d=s',
'help') || pod2usage();
if ($opts{'help'}) {
pod2usage ( { -exitval => 0, -verbose => 2, -output => \*STDERR } );
}
## Initialize and configure logging...
my $logfile = $opts{'log'} || Ergatis::Logger::get_default_filename();
my $debug = $opts{'debug'} ||= 4;