| Filename | /Users/ap13/perl5/lib/perl5/Bio/Factory/FTLocationFactory.pm |
| Statements | Executed 14 statements in 1.14ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 20µs | 60µs | Bio::Factory::FTLocationFactory::BEGIN@92 |
| 1 | 1 | 1 | 19µs | 37µs | Bio::Factory::FTLocationFactory::BEGIN@93 |
| 1 | 1 | 1 | 9µs | 481µs | Bio::Factory::FTLocationFactory::BEGIN@102 |
| 1 | 1 | 1 | 9µs | 15µs | Bio::Factory::FTLocationFactory::BEGIN@104 |
| 1 | 1 | 1 | 7µs | 7µs | Bio::Factory::FTLocationFactory::BEGIN@97 |
| 1 | 1 | 1 | 6µs | 6µs | Bio::Factory::FTLocationFactory::BEGIN@98 |
| 1 | 1 | 1 | 6µs | 6µs | Bio::Factory::FTLocationFactory::CORE:qr (opcode) |
| 1 | 1 | 1 | 5µs | 5µs | Bio::Factory::FTLocationFactory::BEGIN@99 |
| 0 | 0 | 0 | 0s | 0s | Bio::Factory::FTLocationFactory::_parse_location |
| 0 | 0 | 0 | 0s | 0s | Bio::Factory::FTLocationFactory::from_string |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # BioPerl module for Bio::Factory::FTLocationFactory | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Hilmar Lapp <hlapp at gmx.net> | ||||
| 7 | # | ||||
| 8 | # Copyright Hilmar Lapp | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | # | ||||
| 12 | # (c) Hilmar Lapp, hlapp at gnf.org, 2002. | ||||
| 13 | # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. | ||||
| 14 | # | ||||
| 15 | # You may distribute this module under the same terms as perl itself. | ||||
| 16 | # Refer to the Perl Artistic License (see the license accompanying this | ||||
| 17 | # software package, or see http://www.perl.com/language/misc/Artistic.html) | ||||
| 18 | # for the terms under which you may use, modify, and redistribute this module. | ||||
| 19 | # | ||||
| 20 | # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED | ||||
| 21 | # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | ||||
| 22 | # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||||
| 23 | # | ||||
| 24 | |||||
| 25 | # POD documentation - main docs before the code | ||||
| 26 | |||||
| 27 | =head1 NAME | ||||
| 28 | |||||
| 29 | Bio::Factory::FTLocationFactory - A FeatureTable Location Parser | ||||
| 30 | |||||
| 31 | =head1 SYNOPSIS | ||||
| 32 | |||||
| 33 | # parse a string into a location object | ||||
| 34 | $loc = Bio::Factory::FTLocationFactory->from_string("join(100..200, | ||||
| 35 | 400..500"); | ||||
| 36 | |||||
| 37 | =head1 DESCRIPTION | ||||
| 38 | |||||
| 39 | Implementation of string-encoded location parsing for the Genbank feature | ||||
| 40 | table encoding of locations. | ||||
| 41 | |||||
| 42 | =head1 FEEDBACK | ||||
| 43 | |||||
| 44 | =head2 Mailing Lists | ||||
| 45 | |||||
| 46 | User feedback is an integral part of the evolution of this and other | ||||
| 47 | Bioperl modules. Send your comments and suggestions preferably to | ||||
| 48 | the Bioperl mailing list. Your participation is much appreciated. | ||||
| 49 | |||||
| 50 | bioperl-l@bioperl.org - General discussion | ||||
| 51 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 52 | |||||
| 53 | =head2 Support | ||||
| 54 | |||||
| 55 | Please direct usage questions or support issues to the mailing list: | ||||
| 56 | |||||
| 57 | I<bioperl-l@bioperl.org> | ||||
| 58 | |||||
| 59 | rather than to the module maintainer directly. Many experienced and | ||||
| 60 | reponsive experts will be able look at the problem and quickly | ||||
| 61 | address it. Please include a thorough description of the problem | ||||
| 62 | with code and data examples if at all possible. | ||||
| 63 | |||||
| 64 | =head2 Reporting Bugs | ||||
| 65 | |||||
| 66 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 67 | of the bugs and their resolution. Bug reports can be submitted via the | ||||
| 68 | web: | ||||
| 69 | |||||
| 70 | https://github.com/bioperl/bioperl-live/issues | ||||
| 71 | |||||
| 72 | =head1 AUTHOR - Hilmar Lapp | ||||
| 73 | |||||
| 74 | Email hlapp at gmx.net | ||||
| 75 | |||||
| 76 | =head1 CONTRIBUTORS | ||||
| 77 | |||||
| 78 | Jason Stajich, jason-at-bioperl-dot-org | ||||
| 79 | Chris Fields, cjfields-at-uiuc-dot-edu | ||||
| 80 | |||||
| 81 | =head1 APPENDIX | ||||
| 82 | |||||
| 83 | The rest of the documentation details each of the object methods. | ||||
| 84 | Internal methods are usually preceded with a _ | ||||
| 85 | |||||
| 86 | =cut | ||||
| 87 | |||||
| 88 | |||||
| 89 | # Let the code begin... | ||||
| 90 | |||||
| 91 | package Bio::Factory::FTLocationFactory; | ||||
| 92 | 2 | 38µs | 2 | 101µs | # spent 60µs (20+41) within Bio::Factory::FTLocationFactory::BEGIN@92 which was called:
# once (20µs+41µs) by Bio::SeqIO::BEGIN@330 at line 92 # spent 60µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@92
# spent 41µs making 1 call to vars::import |
| 93 | 2 | 40µs | 2 | 54µs | # spent 37µs (19+18) within Bio::Factory::FTLocationFactory::BEGIN@93 which was called:
# once (19µs+18µs) by Bio::SeqIO::BEGIN@330 at line 93 # spent 37µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@93
# spent 18µs making 1 call to strict::import |
| 94 | |||||
| 95 | # Object preamble - inherits from Bio::Root::Root | ||||
| 96 | |||||
| 97 | 2 | 28µs | 1 | 7µs | # spent 7µs within Bio::Factory::FTLocationFactory::BEGIN@97 which was called:
# once (7µs+0s) by Bio::SeqIO::BEGIN@330 at line 97 # spent 7µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@97 |
| 98 | 2 | 22µs | 1 | 6µs | # spent 6µs within Bio::Factory::FTLocationFactory::BEGIN@98 which was called:
# once (6µs+0s) by Bio::SeqIO::BEGIN@330 at line 98 # spent 6µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@98 |
| 99 | 2 | 23µs | 1 | 5µs | # spent 5µs within Bio::Factory::FTLocationFactory::BEGIN@99 which was called:
# once (5µs+0s) by Bio::SeqIO::BEGIN@330 at line 99 # spent 5µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@99 |
| 100 | |||||
| 101 | |||||
| 102 | 2 | 84µs | 2 | 953µs | # spent 481µs (9+472) within Bio::Factory::FTLocationFactory::BEGIN@102 which was called:
# once (9µs+472µs) by Bio::SeqIO::BEGIN@330 at line 102 # spent 481µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@102
# spent 472µs making 1 call to base::import |
| 103 | |||||
| 104 | # spent 15µs (9+6) within Bio::Factory::FTLocationFactory::BEGIN@104 which was called:
# once (9µs+6µs) by Bio::SeqIO::BEGIN@330 at line 115 | ||||
| 105 | # the below is an optimized regex obj. from J. Freidl's Mastering Reg Exp. | ||||
| 106 | 1 | 15µs | 1 | 6µs | $LOCREG = qr{ # spent 6µs making 1 call to Bio::Factory::FTLocationFactory::CORE:qr |
| 107 | (?> | ||||
| 108 | [^()]+ | ||||
| 109 | | | ||||
| 110 | \( | ||||
| 111 | (??{$LOCREG}) | ||||
| 112 | \) | ||||
| 113 | )* | ||||
| 114 | }x; | ||||
| 115 | 1 | 886µs | 1 | 15µs | } # spent 15µs making 1 call to Bio::Factory::FTLocationFactory::BEGIN@104 |
| 116 | |||||
| 117 | =head2 new | ||||
| 118 | |||||
| 119 | Title : new | ||||
| 120 | Usage : my $obj = Bio::Factory::FTLocationFactory->new(); | ||||
| 121 | Function: Builds a new Bio::Factory::FTLocationFactory object | ||||
| 122 | Returns : an instance of Bio::Factory::FTLocationFactory | ||||
| 123 | Args : | ||||
| 124 | |||||
| 125 | =cut | ||||
| 126 | |||||
| 127 | =head2 from_string | ||||
| 128 | |||||
| 129 | Title : from_string | ||||
| 130 | Usage : $loc = $locfactory->from_string("100..200"); | ||||
| 131 | Function: Parses the given string and returns a Bio::LocationI implementing | ||||
| 132 | object representing the location encoded by the string. | ||||
| 133 | |||||
| 134 | This implementation parses the Genbank feature table | ||||
| 135 | encoding of locations. | ||||
| 136 | Example : | ||||
| 137 | Returns : A Bio::LocationI implementing object. | ||||
| 138 | Args : A string. | ||||
| 139 | |||||
| 140 | =cut | ||||
| 141 | |||||
| 142 | sub from_string { | ||||
| 143 | my ($self,$locstr,$op) = @_; | ||||
| 144 | my $loc; | ||||
| 145 | |||||
| 146 | #$self->debug("$locstr\n"); | ||||
| 147 | |||||
| 148 | # $op for operator (error handling) | ||||
| 149 | |||||
| 150 | # run on first pass only | ||||
| 151 | # Note : These location types are now deprecated in GenBank (Oct. 2006) | ||||
| 152 | if (!defined($op)) { | ||||
| 153 | # convert all (X.Y) to [X.Y] | ||||
| 154 | $locstr =~ s{\((\d+\.\d+)\)}{\[$1\]}g; | ||||
| 155 | # convert ABC123:(X..Y) to ABC123:[X..Y] | ||||
| 156 | # we should never see the above | ||||
| 157 | $locstr =~ s{:\((\d+\.{2}\d+)\)}{:\[$1\]}g; | ||||
| 158 | } | ||||
| 159 | |||||
| 160 | if ($locstr =~ m{(.*?)\(($LOCREG)\)(.*)}o) { # any matching parentheses? | ||||
| 161 | my ($beg, $mid, $end) = ($1, $2, $3); | ||||
| 162 | my (@sublocs) = (split(q(,),$beg), $mid, split(q(,),$end)); | ||||
| 163 | |||||
| 164 | my @loc_objs; | ||||
| 165 | my $loc_obj; | ||||
| 166 | my @gl_subloc_strands; | ||||
| 167 | |||||
| 168 | SUBLOCS: | ||||
| 169 | while (@sublocs) { | ||||
| 170 | my $subloc = shift @sublocs; | ||||
| 171 | next if !$subloc; | ||||
| 172 | my $oparg = ($subloc eq 'join' || $subloc eq 'bond' || | ||||
| 173 | $subloc eq 'order' || $subloc eq 'complement') ? $subloc : undef; | ||||
| 174 | # has operator, requires further work (recurse) | ||||
| 175 | if ($oparg) { | ||||
| 176 | my $sub = shift @sublocs; | ||||
| 177 | # simple split operators (no recursive calls needed) | ||||
| 178 | if (($oparg eq 'join' || $oparg eq 'order' || $oparg eq 'bond' ) | ||||
| 179 | && $sub !~ m{(?:join|order|bond)}) { | ||||
| 180 | my @splitlocs = split(q(,), $sub); | ||||
| 181 | $loc_obj = Bio::Location::Split->new(-verbose => 1, | ||||
| 182 | -splittype => $oparg); | ||||
| 183 | # Store strand values for later consistency check | ||||
| 184 | my @subloc_strands; | ||||
| 185 | my @s_objs; | ||||
| 186 | foreach my $splitloc (@splitlocs) { | ||||
| 187 | next unless $splitloc; | ||||
| 188 | my $sobj; | ||||
| 189 | if ($splitloc =~ m{\(($LOCREG)\)}) { | ||||
| 190 | my $comploc = $1; | ||||
| 191 | $sobj = $self->_parse_location($comploc); | ||||
| 192 | $sobj->strand(-1); | ||||
| 193 | push @subloc_strands, -1; | ||||
| 194 | push @gl_subloc_strands, -1; | ||||
| 195 | } else { | ||||
| 196 | $sobj = $self->_parse_location($splitloc); | ||||
| 197 | push @subloc_strands, 1; | ||||
| 198 | push @gl_subloc_strands, 1; | ||||
| 199 | } | ||||
| 200 | push @s_objs, $sobj; | ||||
| 201 | } | ||||
| 202 | |||||
| 203 | # Sublocations strand values consistency check to set | ||||
| 204 | # Guide Strand and sublocations adding order | ||||
| 205 | if (scalar @s_objs > 0) { | ||||
| 206 | my $identical = 0; | ||||
| 207 | my $gl_identical = 0; | ||||
| 208 | |||||
| 209 | my $first_value = $subloc_strands[0]; | ||||
| 210 | foreach my $strand (@subloc_strands) { | ||||
| 211 | $identical++ if ($strand == $first_value); | ||||
| 212 | } | ||||
| 213 | |||||
| 214 | my $first_gl_value = $gl_subloc_strands[0]; | ||||
| 215 | foreach my $gl_strand (@gl_subloc_strands) { | ||||
| 216 | $gl_identical++ if ($gl_strand == $first_gl_value); | ||||
| 217 | } | ||||
| 218 | |||||
| 219 | if ($identical == scalar @subloc_strands) { | ||||
| 220 | # Set guide_strand if all sublocations have the same strand | ||||
| 221 | $loc_obj->guide_strand($first_value); | ||||
| 222 | |||||
| 223 | # Reverse sublocation order for negative strand locations in cases like this: | ||||
| 224 | # join(1..11,join(complement(40..50),complement(60..70))) | ||||
| 225 | # But not this: | ||||
| 226 | # join(complement(10..20),complement(30..40)) | ||||
| 227 | if ( $gl_identical != scalar @gl_subloc_strands | ||||
| 228 | and $first_value == -1 | ||||
| 229 | ) { | ||||
| 230 | @s_objs = reverse @s_objs; | ||||
| 231 | } | ||||
| 232 | } | ||||
| 233 | else { | ||||
| 234 | # Mixed strand values | ||||
| 235 | $loc_obj->guide_strand(undef); | ||||
| 236 | } | ||||
| 237 | |||||
| 238 | # Add sublocations | ||||
| 239 | foreach my $s_obj (@s_objs) { | ||||
| 240 | $loc_obj->add_sub_Location($s_obj); | ||||
| 241 | } | ||||
| 242 | } | ||||
| 243 | } else { | ||||
| 244 | $loc_obj = $self->from_string($sub, $oparg); | ||||
| 245 | # reinsure the operator is set correctly for this level | ||||
| 246 | # unless it is complement | ||||
| 247 | $loc_obj->splittype($oparg) unless $oparg eq 'complement'; | ||||
| 248 | } | ||||
| 249 | } | ||||
| 250 | # no operator, simple or fuzzy | ||||
| 251 | else { | ||||
| 252 | $loc_obj = $self->from_string($subloc,1); | ||||
| 253 | } | ||||
| 254 | if ($op && $op eq 'complement') { | ||||
| 255 | $loc_obj->strand(-1); | ||||
| 256 | push @gl_subloc_strands, -1; | ||||
| 257 | } | ||||
| 258 | else { | ||||
| 259 | push @gl_subloc_strands, 1; | ||||
| 260 | } | ||||
| 261 | |||||
| 262 | push @loc_objs, $loc_obj; | ||||
| 263 | } | ||||
| 264 | my $ct = @loc_objs; | ||||
| 265 | if ($op && !($op eq 'join' || $op eq 'order' || $op eq 'bond') | ||||
| 266 | && $ct > 1 ) { | ||||
| 267 | $self->throw("Bad operator $op: had multiple locations ". | ||||
| 268 | scalar(@loc_objs).", should be SplitLocationI"); | ||||
| 269 | } | ||||
| 270 | if ($ct > 1) { | ||||
| 271 | $loc = Bio::Location::Split->new(); | ||||
| 272 | $loc->add_sub_Location(shift @loc_objs) while (@loc_objs); | ||||
| 273 | return $loc; | ||||
| 274 | } else { | ||||
| 275 | $loc = shift @loc_objs; | ||||
| 276 | return $loc; | ||||
| 277 | } | ||||
| 278 | } else { # simple location(s) | ||||
| 279 | $loc = $self->_parse_location($locstr); | ||||
| 280 | $loc->strand(-1) if ($op && $op eq 'complement'); | ||||
| 281 | } | ||||
| 282 | return $loc; | ||||
| 283 | } | ||||
| 284 | |||||
| 285 | =head2 _parse_location | ||||
| 286 | |||||
| 287 | Title : _parse_location | ||||
| 288 | Usage : $loc = $locfactory->_parse_location( $loc_string) | ||||
| 289 | |||||
| 290 | Function: Parses the given location string and returns a location object | ||||
| 291 | with start() and end() and strand() set appropriately. | ||||
| 292 | Note that this method is private. | ||||
| 293 | Returns : A Bio::LocationI implementing object or undef on failure | ||||
| 294 | Args : location string | ||||
| 295 | |||||
| 296 | =cut | ||||
| 297 | |||||
| 298 | sub _parse_location { | ||||
| 299 | my ($self, $locstr) = @_; | ||||
| 300 | my ($loc, $seqid); | ||||
| 301 | #$self->debug( "Location parse, processing $locstr\n"); | ||||
| 302 | # 'remote' location? | ||||
| 303 | if($locstr =~ m{^(\S+):(.*)$}o) { | ||||
| 304 | # yes; memorize remote ID and strip from location string | ||||
| 305 | $seqid = $1; | ||||
| 306 | $locstr = $2; | ||||
| 307 | } | ||||
| 308 | |||||
| 309 | # split into start and end | ||||
| 310 | my ($start, $end) = split(/\.\./, $locstr); | ||||
| 311 | # remove enclosing parentheses if any; note that because of parentheses | ||||
| 312 | # possibly surrounding the entire location the parentheses around start | ||||
| 313 | # and/or may be asymmetrical | ||||
| 314 | # Note: these are from X.Y fuzzy locations, which are deprecated! | ||||
| 315 | $start =~ s/(?:^\[+|\]+$)//g if $start; | ||||
| 316 | $end =~ s/(?:^\[+|\]+$)//g if $end; | ||||
| 317 | |||||
| 318 | # Is this a simple (exact) or a fuzzy location? Simples have exact start | ||||
| 319 | # and end, or is between two adjacent bases. Everything else is fuzzy. | ||||
| 320 | my $loctype = ".."; # exact with start and end as default | ||||
| 321 | |||||
| 322 | $loctype = '?' if ( ($locstr =~ /\?/) && ($locstr !~ /\?\d+/) ); | ||||
| 323 | |||||
| 324 | my $locclass = "Bio::Location::Simple"; | ||||
| 325 | if(! defined($end)) { | ||||
| 326 | if($locstr =~ /(\d+)([\.\^])(\d+)/) { | ||||
| 327 | $start = $1; | ||||
| 328 | $end = $3; | ||||
| 329 | $loctype = $2; | ||||
| 330 | $locclass = "Bio::Location::Fuzzy" | ||||
| 331 | unless (abs($end-$start) <= 1) && ($loctype eq "^"); | ||||
| 332 | } else { | ||||
| 333 | $end = $start; | ||||
| 334 | } | ||||
| 335 | } | ||||
| 336 | # start_num and end_num are for the numeric only versions of | ||||
| 337 | # start and end so they can be compared | ||||
| 338 | # in a few lines | ||||
| 339 | my ($start_num, $end_num) = ($start,$end); | ||||
| 340 | if ( ($start =~ /[\>\<\?\.\^]/) || ($end =~ /[\>\<\?\.\^]/) ) { | ||||
| 341 | $locclass = 'Bio::Location::Fuzzy'; | ||||
| 342 | if($start =~ /(\d+)/) { | ||||
| 343 | ($start_num) = $1; | ||||
| 344 | } else { | ||||
| 345 | $start_num = 0 | ||||
| 346 | } | ||||
| 347 | if ($end =~ /(\d+)/) { | ||||
| 348 | ($end_num) = $1; | ||||
| 349 | } else { $end_num = 0 } | ||||
| 350 | } | ||||
| 351 | my $strand = 1; | ||||
| 352 | |||||
| 353 | if( $start_num > $end_num && $loctype ne '?') { | ||||
| 354 | ($start,$end,$strand) = ($end,$start,-1); | ||||
| 355 | } | ||||
| 356 | # instantiate location and initialize | ||||
| 357 | $loc = $locclass->new(-verbose => $self->verbose, | ||||
| 358 | -start => $start, | ||||
| 359 | -end => $end, | ||||
| 360 | -strand => $strand, | ||||
| 361 | -location_type => $loctype); | ||||
| 362 | # set remote ID if remote location | ||||
| 363 | if($seqid) { | ||||
| 364 | $loc->is_remote(1); | ||||
| 365 | $loc->seq_id($seqid); | ||||
| 366 | } | ||||
| 367 | |||||
| 368 | # done (hopefully) | ||||
| 369 | return $loc; | ||||
| 370 | } | ||||
| 371 | |||||
| 372 | 1 | 2µs | 1; | ||
# spent 6µs within Bio::Factory::FTLocationFactory::CORE:qr which was called:
# once (6µs+0s) by Bio::Factory::FTLocationFactory::BEGIN@104 at line 106 |