| Filename | /Users/ap13/pathogens/Roary/lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm |
| Statements | Executed 24 statements in 1.42ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 1.74ms | 8.45ms | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 |
| 1 | 1 | 1 | 48µs | 4.57ms | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17 |
| 1 | 1 | 1 | 16µs | 71µs | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19 |
| 1 | 1 | 1 | 15µs | 196µs | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148 |
| 1 | 1 | 1 | 14µs | 51µs | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20 |
| 1 | 1 | 1 | 8µs | 8µs | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21 |
| 1 | 1 | 1 | 6µs | 6µs | Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22 |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_bed_output_filename |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_build__input_seqio |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_build__output_filename |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_build_fasta_file |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_create_bed_file_from_gff |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_create_nucleotide_fasta_file_from_gff |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_extract_nucleotide_regions |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_extracted_nucleotide_fasta_file_from_bed_filename |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_group_file_name |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_group_seq_io_obj |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::_nucleotide_fasta_file_from_gff_filename |
| 0 | 0 | 0 | 0s | 0s | Bio::Roary::Output::GroupsMultifastaNucleotide::populate_files |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package Bio::Roary::Output::GroupsMultifastaNucleotide; | ||||
| 2 | |||||
| 3 | # ABSTRACT: Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences. | ||||
| 4 | |||||
| 5 | =head1 SYNOPSIS | ||||
| 6 | |||||
| 7 | Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences. | ||||
| 8 | use Bio::Roary::Output::GroupsMultifastas; | ||||
| 9 | |||||
| 10 | my $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new( | ||||
| 11 | group_names => ['aaa','bbb'], | ||||
| 12 | ); | ||||
| 13 | $obj->populate_files(); | ||||
| 14 | |||||
| 15 | =cut | ||||
| 16 | |||||
| 17 | 2 | 44µs | 2 | 9.08ms | # spent 4.57ms (48µs+4.52) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17 which was called:
# once (48µs+4.52ms) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 17 # spent 4.57ms making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@17
# spent 4.52ms making 1 call to Moose::import |
| 18 | 2 | 277µs | 1 | 8.45ms | # spent 8.45ms (1.74+6.71) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 which was called:
# once (1.74ms+6.71ms) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 18 # spent 8.45ms making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@18 |
| 19 | 2 | 41µs | 2 | 126µs | # spent 71µs (16+55) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19 which was called:
# once (16µs+55µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 19 # spent 71µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@19
# spent 55µs making 1 call to Exporter::import |
| 20 | 2 | 29µs | 2 | 88µs | # spent 51µs (14+37) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20 which was called:
# once (14µs+37µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 20 # spent 51µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@20
# spent 37µs making 1 call to Exporter::import |
| 21 | 2 | 22µs | 1 | 8µs | # spent 8µs within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21 which was called:
# once (8µs+0s) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 21 # spent 8µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@21 |
| 22 | 2 | 878µs | 1 | 6µs | # spent 6µs within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22 which was called:
# once (6µs+0s) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 22 # spent 6µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@22 |
| 23 | |||||
| 24 | 1 | 3µs | 1 | 2.45ms | has 'gff_file' => ( is => 'ro', isa => 'Str', required => 1 ); # spent 2.45ms making 1 call to Moose::has |
| 25 | #Â Not implemented | ||||
| 26 | 1 | 1µs | 1 | 1.70ms | has 'group_names' => ( is => 'ro', isa => 'ArrayRef', required => 0 ); # spent 1.70ms making 1 call to Moose::has |
| 27 | 1 | 2µs | 1 | 1.53ms | has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 ); # spent 1.53ms making 1 call to Moose::has |
| 28 | 1 | 2µs | 1 | 1.45ms | has 'annotate_groups' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 ); # spent 1.45ms making 1 call to Moose::has |
| 29 | 1 | 2µs | 1 | 1.54ms | has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 ); # spent 1.54ms making 1 call to Moose::has |
| 30 | |||||
| 31 | 1 | 2µs | 1 | 1.96ms | has 'fasta_file' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_fasta_file' ); # spent 1.96ms making 1 call to Moose::has |
| 32 | 1 | 2µs | 1 | 2.46ms | has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' ); # spent 2.46ms making 1 call to Moose::has |
| 33 | |||||
| 34 | 1 | 2µs | 1 | 2.04ms | has '_output_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_filename' ); # spent 2.04ms making 1 call to Moose::has |
| 35 | |||||
| 36 | |||||
| 37 | sub _build__output_filename | ||||
| 38 | { | ||||
| 39 | my ($self) = @_; | ||||
| 40 | my ( $filename, $directories, $suffix ) = fileparse($self->gff_file); | ||||
| 41 | return join('/',($self->output_directory, $filename.'.tmp_nuc_sequences.fa' )); | ||||
| 42 | } | ||||
| 43 | |||||
| 44 | sub _build__input_seqio { | ||||
| 45 | my ($self) = @_; | ||||
| 46 | return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' ); | ||||
| 47 | } | ||||
| 48 | |||||
| 49 | sub populate_files { | ||||
| 50 | my ($self) = @_; | ||||
| 51 | while ( my $input_seq = $self->_input_seqio->next_seq() ) | ||||
| 52 | { | ||||
| 53 | if ( $self->annotate_groups->_ids_to_groups->{$input_seq->display_id} ) | ||||
| 54 | { | ||||
| 55 | my $current_group = $self->annotate_groups->_ids_to_groups->{$input_seq->display_id}; | ||||
| 56 | |||||
| 57 | my $number_of_genes = @{$self->annotate_groups->_groups_to_id_names->{$current_group}}; | ||||
| 58 | # Theres no need to align a single sequence | ||||
| 59 | next if($self->output_multifasta_files == 0 && $number_of_genes == 1); | ||||
| 60 | |||||
| 61 | my $output_seq = $self->_group_seq_io_obj($current_group,$number_of_genes); | ||||
| 62 | $output_seq->write_seq($input_seq); | ||||
| 63 | } | ||||
| 64 | } | ||||
| 65 | |||||
| 66 | unlink($self->fasta_file); | ||||
| 67 | 1; | ||||
| 68 | } | ||||
| 69 | |||||
| 70 | sub _group_file_name | ||||
| 71 | { | ||||
| 72 | my ($self,$group_name,$num_group_genes) = @_; | ||||
| 73 | my $annotated_group_name = $self->annotate_groups->_groups_to_consensus_gene_names->{$group_name}; | ||||
| 74 | $annotated_group_name =~ s!\W!_!gi; | ||||
| 75 | my $filename = $annotated_group_name.'.fa'; | ||||
| 76 | my $group_file_name = join('/',($self->output_directory, $filename )); | ||||
| 77 | return $group_file_name; | ||||
| 78 | } | ||||
| 79 | |||||
| 80 | sub _group_seq_io_obj | ||||
| 81 | { | ||||
| 82 | my ($self,$group_name,$num_group_genes) = @_; | ||||
| 83 | my $filename = $self->_group_file_name($group_name,$num_group_genes); | ||||
| 84 | return Bio::SeqIO->new( -file => ">>".$filename, -format => 'Fasta' ); | ||||
| 85 | } | ||||
| 86 | |||||
| 87 | |||||
| 88 | sub _extracted_nucleotide_fasta_file_from_bed_filename { | ||||
| 89 | my ($self) = @_; | ||||
| 90 | return join( '.', ( $self->_output_filename, 'intermediate.extracted.fa' ) ); | ||||
| 91 | } | ||||
| 92 | |||||
| 93 | sub _create_bed_file_from_gff { | ||||
| 94 | my ($self) = @_; | ||||
| 95 | my $cmd = | ||||
| 96 | 'sed -n \'/##gff-version 3/,/##FASTA/p\' ' | ||||
| 97 | . $self->gff_file | ||||
| 98 | . ' | grep -v \'^#\' | awk \'{print $1"\t"($4-1)"\t"($5)"\t"$9"\t1\t"$7}\' | sed \'s/ID=//\' | sed \'s/;[^\t]*\t/\t/g\' > ' | ||||
| 99 | . $self->_bed_output_filename; | ||||
| 100 | system($cmd); | ||||
| 101 | } | ||||
| 102 | |||||
| 103 | sub _create_nucleotide_fasta_file_from_gff { | ||||
| 104 | my ($self) = @_; | ||||
| 105 | my $cmd = | ||||
| 106 | 'sed -n \'/##FASTA/,//p\' ' | ||||
| 107 | . $self->gff_file | ||||
| 108 | . ' | grep -v \'##FASTA\' > ' | ||||
| 109 | . $self->_nucleotide_fasta_file_from_gff_filename; | ||||
| 110 | system($cmd); | ||||
| 111 | } | ||||
| 112 | |||||
| 113 | sub _nucleotide_fasta_file_from_gff_filename { | ||||
| 114 | my ($self) = @_; | ||||
| 115 | return join( '.', ( $self->_output_filename, 'intermediate.fa' ) ); | ||||
| 116 | } | ||||
| 117 | |||||
| 118 | sub _bed_output_filename { | ||||
| 119 | my ($self) = @_; | ||||
| 120 | return join( '.', ( $self->_output_filename, 'intermediate.bed' ) ); | ||||
| 121 | } | ||||
| 122 | |||||
| 123 | sub _extract_nucleotide_regions { | ||||
| 124 | my ($self) = @_; | ||||
| 125 | |||||
| 126 | $self->_create_nucleotide_fasta_file_from_gff; | ||||
| 127 | $self->_create_bed_file_from_gff; | ||||
| 128 | |||||
| 129 | my $cmd = | ||||
| 130 | 'bedtools getfasta -s -fi ' | ||||
| 131 | . $self->_nucleotide_fasta_file_from_gff_filename | ||||
| 132 | . ' -bed ' | ||||
| 133 | . $self->_bed_output_filename . ' -fo ' | ||||
| 134 | . $self->_extracted_nucleotide_fasta_file_from_bed_filename | ||||
| 135 | . ' -name > /dev/null 2>&1'; | ||||
| 136 | system($cmd); | ||||
| 137 | unlink( $self->_nucleotide_fasta_file_from_gff_filename ); | ||||
| 138 | unlink( $self->_bed_output_filename ); | ||||
| 139 | unlink( $self->_nucleotide_fasta_file_from_gff_filename . '.fai' ); | ||||
| 140 | return $self->_extracted_nucleotide_fasta_file_from_bed_filename; | ||||
| 141 | } | ||||
| 142 | |||||
| 143 | sub _build_fasta_file { | ||||
| 144 | my ($self) = @_; | ||||
| 145 | return $self->_extract_nucleotide_regions; | ||||
| 146 | } | ||||
| 147 | |||||
| 148 | 2 | 66µs | 2 | 377µs | # spent 196µs (15+181) within Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148 which was called:
# once (15µs+181µs) by Bio::Roary::Output::GroupsMultifastasNucleotide::BEGIN@23 at line 148 # spent 196µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::BEGIN@148
# spent 181µs making 1 call to Moose::unimport |
| 149 | 1 | 8µs | 2 | 5.84ms | __PACKAGE__->meta->make_immutable; # spent 5.82ms making 1 call to Class::MOP::Class::make_immutable
# spent 20µs making 1 call to Bio::Roary::Output::GroupsMultifastaNucleotide::meta |
| 150 | |||||
| 151 | 1 | 42µs | 1; | ||
| 152 |