| Filename | /Users/ap13/perl5/lib/perl5/Bio/SeqFeatureI.pm |
| Statements | Executed 16 statements in 1.93ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 825µs | 4.91ms | Bio::SeqFeatureI::BEGIN@99 |
| 1 | 1 | 1 | 14µs | 45µs | Bio::SeqFeatureI::BEGIN@97 |
| 1 | 1 | 1 | 9µs | 47µs | Bio::SeqFeatureI::BEGIN@107 |
| 1 | 1 | 1 | 8µs | 66µs | Bio::SeqFeatureI::BEGIN@109 |
| 1 | 1 | 1 | 8µs | 20µs | Bio::SeqFeatureI::BEGIN@98 |
| 1 | 1 | 1 | 6µs | 6µs | Bio::SeqFeatureI::BEGIN@105 |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::_static_gff_formatter |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::attach_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::display_name |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::entire_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::generate_unique_persistent_id |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::get_SeqFeatures |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::get_all_tags |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::get_tag_values |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::get_tagset_values |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::gff_string |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::has_tag |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::location |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::phase |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::primary_id |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::primary_tag |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::seq |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::seq_id |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::source_tag |
| 0 | 0 | 0 | 0s | 0s | Bio::SeqFeatureI::spliced_seq |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # BioPerl module for Bio::SeqFeatureI | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
| 7 | # | ||||
| 8 | # Copyright Ewan Birney | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | |||||
| 12 | # POD documentation - main docs before the code | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | Bio::SeqFeatureI - Abstract interface of a Sequence Feature | ||||
| 17 | |||||
| 18 | =head1 SYNOPSIS | ||||
| 19 | |||||
| 20 | # get a seqfeature somehow, eg, from a Sequence with Features attached | ||||
| 21 | |||||
| 22 | foreach $feat ( $seq->get_SeqFeatures() ) { | ||||
| 23 | print "Feature from ", $feat->start, "to ", | ||||
| 24 | $feat->end, " Primary tag ", $feat->primary_tag, | ||||
| 25 | ", produced by ", $feat->source_tag(), "\n"; | ||||
| 26 | |||||
| 27 | if ( $feat->strand == 0 ) { | ||||
| 28 | print "Feature applicable to either strand\n"; | ||||
| 29 | } | ||||
| 30 | else { | ||||
| 31 | print "Feature on strand ", $feat->strand,"\n"; # -1,1 | ||||
| 32 | } | ||||
| 33 | |||||
| 34 | print "feature location is ",$feat->start, "..", | ||||
| 35 | $feat->end, " on strand ", $feat->strand, "\n"; | ||||
| 36 | print "easy utility to print locations in GenBank/EMBL way ", | ||||
| 37 | $feat->location->to_FTstring(), "\n"; | ||||
| 38 | |||||
| 39 | foreach $tag ( $feat->get_all_tags() ) { | ||||
| 40 | print "Feature has tag ", $tag, " with values, ", | ||||
| 41 | join(' ',$feat->get_tag_values($tag)), "\n"; | ||||
| 42 | } | ||||
| 43 | print "new feature\n" if $feat->has_tag('new'); | ||||
| 44 | # features can have sub features | ||||
| 45 | my @subfeat = $feat->get_SeqFeatures(); | ||||
| 46 | } | ||||
| 47 | |||||
| 48 | =head1 DESCRIPTION | ||||
| 49 | |||||
| 50 | This interface is the functions one can expect for any Sequence | ||||
| 51 | Feature, whatever its implementation or whether it is a more complex | ||||
| 52 | type (eg, a Gene). This object does not actually provide any | ||||
| 53 | implementation, it just provides the definitions of what methods one can | ||||
| 54 | call. See Bio::SeqFeature::Generic for a good standard implementation | ||||
| 55 | of this object | ||||
| 56 | |||||
| 57 | =head1 FEEDBACK | ||||
| 58 | |||||
| 59 | User feedback is an integral part of the evolution of this and other | ||||
| 60 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
| 61 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
| 62 | |||||
| 63 | bioperl-l@bioperl.org - General discussion | ||||
| 64 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 65 | |||||
| 66 | =head2 Support | ||||
| 67 | |||||
| 68 | Please direct usage questions or support issues to the mailing list: | ||||
| 69 | |||||
| 70 | I<bioperl-l@bioperl.org> | ||||
| 71 | |||||
| 72 | rather than to the module maintainer directly. Many experienced and | ||||
| 73 | reponsive experts will be able look at the problem and quickly | ||||
| 74 | address it. Please include a thorough description of the problem | ||||
| 75 | with code and data examples if at all possible. | ||||
| 76 | |||||
| 77 | =head2 Reporting Bugs | ||||
| 78 | |||||
| 79 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 80 | the bugs and their resolution. Bug reports can be submitted via the | ||||
| 81 | web: | ||||
| 82 | |||||
| 83 | https://github.com/bioperl/bioperl-live/issues | ||||
| 84 | |||||
| 85 | =head1 APPENDIX | ||||
| 86 | |||||
| 87 | The rest of the documentation details each of the object | ||||
| 88 | methods. Internal methods are usually preceded with a _ | ||||
| 89 | |||||
| 90 | =cut | ||||
| 91 | |||||
| 92 | |||||
| 93 | # Let the code begin... | ||||
| 94 | |||||
| 95 | |||||
| 96 | package Bio::SeqFeatureI; | ||||
| 97 | 2 | 27µs | 2 | 75µs | # spent 45µs (14+30) within Bio::SeqFeatureI::BEGIN@97 which was called:
# once (14µs+30µs) by base::import at line 97 # spent 45µs making 1 call to Bio::SeqFeatureI::BEGIN@97
# spent 30µs making 1 call to vars::import |
| 98 | 2 | 45µs | 2 | 32µs | # spent 20µs (8+12) within Bio::SeqFeatureI::BEGIN@98 which was called:
# once (8µs+12µs) by base::import at line 98 # spent 20µs making 1 call to Bio::SeqFeatureI::BEGIN@98
# spent 12µs making 1 call to strict::import |
| 99 | # spent 4.91ms (825µs+4.08) within Bio::SeqFeatureI::BEGIN@99 which was called:
# once (825µs+4.08ms) by base::import at line 103 | ||||
| 100 | 2 | 102µs | eval { require Bio::DB::InMemoryCache }; | ||
| 101 | 1 | 5µs | if( $@ ) { $HasInMemory = 0 } | ||
| 102 | 1 | 400ns | else { $HasInMemory = 1 } | ||
| 103 | 1 | 23µs | 1 | 4.91ms | } # spent 4.91ms making 1 call to Bio::SeqFeatureI::BEGIN@99 |
| 104 | |||||
| 105 | 2 | 22µs | 1 | 6µs | # spent 6µs within Bio::SeqFeatureI::BEGIN@105 which was called:
# once (6µs+0s) by base::import at line 105 # spent 6µs making 1 call to Bio::SeqFeatureI::BEGIN@105 |
| 106 | |||||
| 107 | 2 | 25µs | 2 | 86µs | # spent 47µs (9+38) within Bio::SeqFeatureI::BEGIN@107 which was called:
# once (9µs+38µs) by base::import at line 107 # spent 47µs making 1 call to Bio::SeqFeatureI::BEGIN@107
# spent 38µs making 1 call to Exporter::import |
| 108 | |||||
| 109 | 2 | 1.68ms | 2 | 66µs | # spent 66µs (8+58) within Bio::SeqFeatureI::BEGIN@109 which was called:
# once (8µs+58µs) by base::import at line 109 # spent 66µs making 1 call to Bio::SeqFeatureI::BEGIN@109
# spent 58µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 58µs |
| 110 | |||||
| 111 | =head1 Bio::SeqFeatureI specific methods | ||||
| 112 | |||||
| 113 | New method interfaces. | ||||
| 114 | |||||
| 115 | =cut | ||||
| 116 | |||||
| 117 | =head2 get_SeqFeatures | ||||
| 118 | |||||
| 119 | Title : get_SeqFeatures | ||||
| 120 | Usage : @feats = $feat->get_SeqFeatures(); | ||||
| 121 | Function: Returns an array of sub Sequence Features | ||||
| 122 | Returns : An array | ||||
| 123 | Args : none | ||||
| 124 | |||||
| 125 | =cut | ||||
| 126 | |||||
| 127 | sub get_SeqFeatures{ | ||||
| 128 | my ($self,@args) = @_; | ||||
| 129 | |||||
| 130 | $self->throw_not_implemented(); | ||||
| 131 | } | ||||
| 132 | |||||
| 133 | =head2 display_name | ||||
| 134 | |||||
| 135 | Title : display_name | ||||
| 136 | Usage : $name = $feat->display_name() | ||||
| 137 | Function: Returns the human-readable name of the feature for displays. | ||||
| 138 | Returns : a string | ||||
| 139 | Args : none | ||||
| 140 | |||||
| 141 | =cut | ||||
| 142 | |||||
| 143 | sub display_name { | ||||
| 144 | shift->throw_not_implemented(); | ||||
| 145 | } | ||||
| 146 | |||||
| 147 | =head2 primary_tag | ||||
| 148 | |||||
| 149 | Title : primary_tag | ||||
| 150 | Usage : $tag = $feat->primary_tag() | ||||
| 151 | Function: Returns the primary tag for a feature, | ||||
| 152 | eg 'exon' | ||||
| 153 | Returns : a string | ||||
| 154 | Args : none | ||||
| 155 | |||||
| 156 | |||||
| 157 | =cut | ||||
| 158 | |||||
| 159 | sub primary_tag{ | ||||
| 160 | my ($self,@args) = @_; | ||||
| 161 | |||||
| 162 | $self->throw_not_implemented(); | ||||
| 163 | |||||
| 164 | } | ||||
| 165 | |||||
| 166 | =head2 source_tag | ||||
| 167 | |||||
| 168 | Title : source_tag | ||||
| 169 | Usage : $tag = $feat->source_tag() | ||||
| 170 | Function: Returns the source tag for a feature, | ||||
| 171 | eg, 'genscan' | ||||
| 172 | Returns : a string | ||||
| 173 | Args : none | ||||
| 174 | |||||
| 175 | |||||
| 176 | =cut | ||||
| 177 | |||||
| 178 | sub source_tag{ | ||||
| 179 | my ($self,@args) = @_; | ||||
| 180 | |||||
| 181 | $self->throw_not_implemented(); | ||||
| 182 | } | ||||
| 183 | |||||
| 184 | =head2 has_tag | ||||
| 185 | |||||
| 186 | Title : has_tag | ||||
| 187 | Usage : $tag_exists = $self->has_tag('some_tag') | ||||
| 188 | Function: | ||||
| 189 | Returns : TRUE if the specified tag exists, and FALSE otherwise | ||||
| 190 | Args : | ||||
| 191 | |||||
| 192 | =cut | ||||
| 193 | |||||
| 194 | sub has_tag{ | ||||
| 195 | my ($self,@args) = @_; | ||||
| 196 | |||||
| 197 | $self->throw_not_implemented(); | ||||
| 198 | |||||
| 199 | } | ||||
| 200 | |||||
| 201 | =head2 get_tag_values | ||||
| 202 | |||||
| 203 | Title : get_tag_values | ||||
| 204 | Usage : @values = $self->get_tag_values('some_tag') | ||||
| 205 | Function: | ||||
| 206 | Returns : An array comprising the values of the specified tag. | ||||
| 207 | Args : a string | ||||
| 208 | |||||
| 209 | throws an exception if there is no such tag | ||||
| 210 | |||||
| 211 | =cut | ||||
| 212 | |||||
| 213 | sub get_tag_values { | ||||
| 214 | shift->throw_not_implemented(); | ||||
| 215 | } | ||||
| 216 | |||||
| 217 | =head2 get_tagset_values | ||||
| 218 | |||||
| 219 | Title : get_tagset_values | ||||
| 220 | Usage : @values = $self->get_tagset_values(qw(label transcript_id product)) | ||||
| 221 | Function: | ||||
| 222 | Returns : An array comprising the values of the specified tags, in order of tags | ||||
| 223 | Args : An array of strings | ||||
| 224 | |||||
| 225 | does NOT throw an exception if none of the tags are not present | ||||
| 226 | |||||
| 227 | this method is useful for getting a human-readable label for a | ||||
| 228 | SeqFeatureI; not all tags can be assumed to be present, so a list of | ||||
| 229 | possible tags in preferential order is provided | ||||
| 230 | |||||
| 231 | =cut | ||||
| 232 | |||||
| 233 | # interface + abstract method | ||||
| 234 | sub get_tagset_values { | ||||
| 235 | my ($self, @args) = @_; | ||||
| 236 | my @vals = (); | ||||
| 237 | foreach my $arg (@args) { | ||||
| 238 | if ($self->has_tag($arg)) { | ||||
| 239 | push(@vals, $self->get_tag_values($arg)); | ||||
| 240 | } | ||||
| 241 | } | ||||
| 242 | return @vals; | ||||
| 243 | } | ||||
| 244 | |||||
| 245 | =head2 get_all_tags | ||||
| 246 | |||||
| 247 | Title : get_all_tags | ||||
| 248 | Usage : @tags = $feat->get_all_tags() | ||||
| 249 | Function: gives all tags for this feature | ||||
| 250 | Returns : an array of strings | ||||
| 251 | Args : none | ||||
| 252 | |||||
| 253 | |||||
| 254 | =cut | ||||
| 255 | |||||
| 256 | sub get_all_tags{ | ||||
| 257 | shift->throw_not_implemented(); | ||||
| 258 | } | ||||
| 259 | |||||
| 260 | =head2 attach_seq | ||||
| 261 | |||||
| 262 | Title : attach_seq | ||||
| 263 | Usage : $sf->attach_seq($seq) | ||||
| 264 | Function: Attaches a Bio::Seq object to this feature. This | ||||
| 265 | Bio::Seq object is for the *entire* sequence: ie | ||||
| 266 | from 1 to 10000 | ||||
| 267 | |||||
| 268 | Note that it is not guaranteed that if you obtain a feature from | ||||
| 269 | an object in bioperl, it will have a sequence attached. Also, | ||||
| 270 | implementors of this interface can choose to provide an empty | ||||
| 271 | implementation of this method. I.e., there is also no guarantee | ||||
| 272 | that if you do attach a sequence, seq() or entire_seq() will not | ||||
| 273 | return undef. | ||||
| 274 | |||||
| 275 | The reason that this method is here on the interface is to enable | ||||
| 276 | you to call it on every SeqFeatureI compliant object, and | ||||
| 277 | that it will be implemented in a useful way and set to a useful | ||||
| 278 | value for the great majority of use cases. Implementors who choose | ||||
| 279 | to ignore the call are encouraged to specifically state this in | ||||
| 280 | their documentation. | ||||
| 281 | |||||
| 282 | Example : | ||||
| 283 | Returns : TRUE on success | ||||
| 284 | Args : a Bio::PrimarySeqI compliant object | ||||
| 285 | |||||
| 286 | |||||
| 287 | =cut | ||||
| 288 | |||||
| 289 | sub attach_seq { | ||||
| 290 | shift->throw_not_implemented(); | ||||
| 291 | } | ||||
| 292 | |||||
| 293 | =head2 seq | ||||
| 294 | |||||
| 295 | Title : seq | ||||
| 296 | Usage : $tseq = $sf->seq() | ||||
| 297 | Function: returns the truncated sequence (if there is a sequence attached) | ||||
| 298 | for this feature | ||||
| 299 | Example : | ||||
| 300 | Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence | ||||
| 301 | bounded by start & end, or undef if there is no sequence attached | ||||
| 302 | Args : none | ||||
| 303 | |||||
| 304 | |||||
| 305 | =cut | ||||
| 306 | |||||
| 307 | sub seq { | ||||
| 308 | shift->throw_not_implemented(); | ||||
| 309 | } | ||||
| 310 | |||||
| 311 | =head2 entire_seq | ||||
| 312 | |||||
| 313 | Title : entire_seq | ||||
| 314 | Usage : $whole_seq = $sf->entire_seq() | ||||
| 315 | Function: gives the entire sequence that this seqfeature is attached to | ||||
| 316 | Example : | ||||
| 317 | Returns : a Bio::PrimarySeqI compliant object, or undef if there is no | ||||
| 318 | sequence attached | ||||
| 319 | Args : none | ||||
| 320 | |||||
| 321 | |||||
| 322 | =cut | ||||
| 323 | |||||
| 324 | sub entire_seq { | ||||
| 325 | shift->throw_not_implemented(); | ||||
| 326 | } | ||||
| 327 | |||||
| 328 | |||||
| 329 | =head2 seq_id | ||||
| 330 | |||||
| 331 | Title : seq_id | ||||
| 332 | Usage : $obj->seq_id($newval) | ||||
| 333 | Function: There are many cases when you make a feature that you | ||||
| 334 | do know the sequence name, but do not know its actual | ||||
| 335 | sequence. This is an attribute such that you can store | ||||
| 336 | the ID (e.g., display_id) of the sequence. | ||||
| 337 | |||||
| 338 | This attribute should *not* be used in GFF dumping, as | ||||
| 339 | that should come from the collection in which the seq | ||||
| 340 | feature was found. | ||||
| 341 | Returns : value of seq_id | ||||
| 342 | Args : newvalue (optional) | ||||
| 343 | |||||
| 344 | |||||
| 345 | =cut | ||||
| 346 | |||||
| 347 | sub seq_id { | ||||
| 348 | shift->throw_not_implemented(); | ||||
| 349 | } | ||||
| 350 | |||||
| 351 | =head2 gff_string | ||||
| 352 | |||||
| 353 | Title : gff_string | ||||
| 354 | Usage : $str = $feat->gff_string; | ||||
| 355 | $str = $feat->gff_string($gff_formatter); | ||||
| 356 | Function: Provides the feature information in GFF format. | ||||
| 357 | |||||
| 358 | The implementation provided here returns GFF2 by default. If you | ||||
| 359 | want a different version, supply an object implementing a method | ||||
| 360 | gff_string() accepting a SeqFeatureI object as argument. E.g., to | ||||
| 361 | obtain GFF1 format, do the following: | ||||
| 362 | |||||
| 363 | my $gffio = Bio::Tools::GFF->new(-gff_version => 1); | ||||
| 364 | $gff1str = $feat->gff_string($gff1io); | ||||
| 365 | |||||
| 366 | Returns : A string | ||||
| 367 | Args : Optionally, an object implementing gff_string(). | ||||
| 368 | |||||
| 369 | |||||
| 370 | =cut | ||||
| 371 | |||||
| 372 | sub gff_string{ | ||||
| 373 | my ($self,$formatter) = @_; | ||||
| 374 | |||||
| 375 | $formatter = $self->_static_gff_formatter unless $formatter; | ||||
| 376 | return $formatter->gff_string($self); | ||||
| 377 | } | ||||
| 378 | |||||
| 379 | 1 | 700ns | my $static_gff_formatter = undef; | ||
| 380 | |||||
| 381 | =head2 _static_gff_formatter | ||||
| 382 | |||||
| 383 | Title : _static_gff_formatter | ||||
| 384 | Usage : | ||||
| 385 | Function: | ||||
| 386 | Example : | ||||
| 387 | Returns : | ||||
| 388 | Args : | ||||
| 389 | |||||
| 390 | |||||
| 391 | =cut | ||||
| 392 | |||||
| 393 | sub _static_gff_formatter{ | ||||
| 394 | my ($self,@args) = @_; | ||||
| 395 | require Bio::Tools::GFF; # on the fly inclusion -- is this better? | ||||
| 396 | if( !defined $static_gff_formatter ) { | ||||
| 397 | $static_gff_formatter = Bio::Tools::GFF->new('-gff_version' => 2); | ||||
| 398 | } | ||||
| 399 | return $static_gff_formatter; | ||||
| 400 | } | ||||
| 401 | |||||
| 402 | |||||
| 403 | =head1 Decorating methods | ||||
| 404 | |||||
| 405 | These methods have an implementation provided by Bio::SeqFeatureI, | ||||
| 406 | but can be validly overwritten by subclasses | ||||
| 407 | |||||
| 408 | =head2 spliced_seq | ||||
| 409 | |||||
| 410 | Title : spliced_seq | ||||
| 411 | |||||
| 412 | Usage : $seq = $feature->spliced_seq() | ||||
| 413 | $seq = $feature_with_remote_locations->spliced_seq($db_for_seqs) | ||||
| 414 | |||||
| 415 | Function: Provides a sequence of the feature which is the most | ||||
| 416 | semantically "relevant" feature for this sequence. A default | ||||
| 417 | implementation is provided which for simple cases returns just | ||||
| 418 | the sequence, but for split cases, loops over the split location | ||||
| 419 | to return the sequence. In the case of split locations with | ||||
| 420 | remote locations, eg | ||||
| 421 | |||||
| 422 | join(AB000123:5567-5589,80..1144) | ||||
| 423 | |||||
| 424 | in the case when a database object is passed in, it will attempt | ||||
| 425 | to retrieve the sequence from the database object, and "Do the right thing", | ||||
| 426 | however if no database object is provided, it will generate the correct | ||||
| 427 | number of N's (DNA) or X's (protein, though this is unlikely). | ||||
| 428 | |||||
| 429 | This function is deliberately "magical" attempting to second guess | ||||
| 430 | what a user wants as "the" sequence for this feature. | ||||
| 431 | |||||
| 432 | Implementing classes are free to override this method with their | ||||
| 433 | own magic if they have a better idea what the user wants. | ||||
| 434 | |||||
| 435 | Args : [optional] | ||||
| 436 | -db A L<Bio::DB::RandomAccessI> compliant object if | ||||
| 437 | one needs to retrieve remote seqs. | ||||
| 438 | -nosort boolean if the locations should not be sorted | ||||
| 439 | by start location. This may occur, for instance, | ||||
| 440 | in a circular sequence where a gene span starts | ||||
| 441 | before the end of the sequence and ends after the | ||||
| 442 | sequence start. Example : join(15685..16260,1..207) | ||||
| 443 | (default = if sequence is_circular(), 1, otherwise 0) | ||||
| 444 | -phase truncates the returned sequence based on the | ||||
| 445 | intron phase (0,1,2). | ||||
| 446 | |||||
| 447 | Returns : A L<Bio::PrimarySeqI> object | ||||
| 448 | |||||
| 449 | =cut | ||||
| 450 | |||||
| 451 | sub spliced_seq { | ||||
| 452 | my $self = shift; | ||||
| 453 | my @args = @_; | ||||
| 454 | my ($db, $nosort, $phase) = | ||||
| 455 | $self->_rearrange([qw(DB NOSORT PHASE)], @args); | ||||
| 456 | |||||
| 457 | # set no_sort based on the parent sequence status | ||||
| 458 | if ($self->entire_seq->is_circular) { | ||||
| 459 | $nosort = 1; | ||||
| 460 | } | ||||
| 461 | |||||
| 462 | # (added 7/7/06 to allow use old API (with warnings) | ||||
| 463 | my $old_api = (!(grep {$_ =~ /(?:nosort|db|phase)/} @args)) ? 1 : 0; | ||||
| 464 | if (@args && $old_api) { | ||||
| 465 | $self->warn( q(API has changed; please use '-db' or '-nosort' ) | ||||
| 466 | . qq(for args. See POD for more details.)); | ||||
| 467 | $db = shift @args if @args; | ||||
| 468 | $nosort = shift @args if @args; | ||||
| 469 | $phase = shift @args if @args; | ||||
| 470 | }; | ||||
| 471 | |||||
| 472 | if (defined($phase) && ($phase < 0 || $phase > 2)) { | ||||
| 473 | $self->warn("Phase must be 0,1, or 2. Setting phase to 0..."); | ||||
| 474 | $phase = 0; | ||||
| 475 | } | ||||
| 476 | |||||
| 477 | if ( $db && ref($db) && ! $db->isa('Bio::DB::RandomAccessI') ) { | ||||
| 478 | $self->warn( "Must pass in a valid Bio::DB::RandomAccessI object" | ||||
| 479 | . " for access to remote locations for spliced_seq"); | ||||
| 480 | $db = undef; | ||||
| 481 | } | ||||
| 482 | elsif ( defined $db && $HasInMemory && $db->isa('Bio::DB::InMemoryCache') ) { | ||||
| 483 | $db = Bio::DB::InMemoryCache->new(-seqdb => $db); | ||||
| 484 | } | ||||
| 485 | |||||
| 486 | if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { | ||||
| 487 | if ($phase) { | ||||
| 488 | $self->debug("Subseq start: ",$phase+1,"\tend: ",$self->end,"\n"); | ||||
| 489 | my $seqstr = substr($self->seq->seq, $phase); | ||||
| 490 | my $out = Bio::Seq->new( -id => $self->entire_seq->display_id | ||||
| 491 | . "_spliced_feat", | ||||
| 492 | -seq => $seqstr); | ||||
| 493 | return $out; | ||||
| 494 | } | ||||
| 495 | else { | ||||
| 496 | return $self->seq(); # nice and easy! | ||||
| 497 | } | ||||
| 498 | } | ||||
| 499 | |||||
| 500 | # redundant test, but the above ISA is probably not ideal. | ||||
| 501 | if ( not $self->location->isa("Bio::Location::SplitLocationI") ) { | ||||
| 502 | $self->throw("not atomic, not split, yikes, in trouble!"); | ||||
| 503 | } | ||||
| 504 | |||||
| 505 | my $seqstr = ''; | ||||
| 506 | my $seqid = $self->entire_seq->display_id; | ||||
| 507 | # This is to deal with reverse strand features | ||||
| 508 | # so we are really sorting features 5' -> 3' on their strand | ||||
| 509 | # i.e. rev strand features will be sorted largest to smallest | ||||
| 510 | # as this how revcom CDSes seem to be annotated in genbank. | ||||
| 511 | # Might need to eventually allow this to be programable? | ||||
| 512 | # (can I mention how much fun this is NOT! --jason) | ||||
| 513 | |||||
| 514 | my ($mixed,$mixedloc, $fstrand) = (0); | ||||
| 515 | |||||
| 516 | if ( $self->isa('Bio::Das::SegmentI') and not $self->absolute ) { | ||||
| 517 | $self->warn( "Calling spliced_seq with a Bio::Das::SegmentI which " | ||||
| 518 | . "does have absolute set to 1 -- be warned you may not " | ||||
| 519 | . "be getting things on the correct strand"); | ||||
| 520 | } | ||||
| 521 | |||||
| 522 | my @locset = $self->location->each_Location; | ||||
| 523 | my @locs; | ||||
| 524 | if ( not $nosort ) { | ||||
| 525 | @locs = map { $_->[0] } | ||||
| 526 | # sort so that most negative is first basically to order | ||||
| 527 | # the features on the opposite strand 5'->3' on their strand | ||||
| 528 | # rather than they way most are input which is on the fwd strand | ||||
| 529 | |||||
| 530 | sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation | ||||
| 531 | map { | ||||
| 532 | $fstrand = $_->strand unless defined $fstrand; | ||||
| 533 | $mixed = 1 if defined $_->strand && $fstrand != $_->strand; | ||||
| 534 | |||||
| 535 | if( defined $_->seq_id ) { | ||||
| 536 | $mixedloc = 1 if( $_->seq_id ne $seqid ); | ||||
| 537 | } | ||||
| 538 | [ $_, $_->start * ($_->strand || 1) ]; | ||||
| 539 | } @locset; | ||||
| 540 | |||||
| 541 | if ( $mixed ) { | ||||
| 542 | $self->warn( "Mixed strand locations, spliced seq using the " | ||||
| 543 | . "input order rather than trying to sort"); | ||||
| 544 | @locs = @locset; | ||||
| 545 | } | ||||
| 546 | } | ||||
| 547 | else { | ||||
| 548 | # use the original order instead of trying to sort | ||||
| 549 | @locs = @locset; | ||||
| 550 | $fstrand = $locs[0]->strand; | ||||
| 551 | } | ||||
| 552 | |||||
| 553 | |||||
| 554 | my $last_id = undef; | ||||
| 555 | my $called_seq = undef; | ||||
| 556 | # This will be left as undefined if 1) db is remote or 2)seq_id is undefined. | ||||
| 557 | # In that case, old code is used to make exon sequence | ||||
| 558 | my $called_seq_seq = undef; | ||||
| 559 | my $called_seq_len = undef; | ||||
| 560 | |||||
| 561 | foreach my $loc ( @locs ) { | ||||
| 562 | if ( not $loc->isa("Bio::Location::Atomic") ) { | ||||
| 563 | $self->throw("Can only deal with one level deep locations"); | ||||
| 564 | } | ||||
| 565 | |||||
| 566 | if ( $fstrand != $loc->strand ) { | ||||
| 567 | $self->warn("feature strand is different from location strand!"); | ||||
| 568 | } | ||||
| 569 | |||||
| 570 | my $loc_seq_id; | ||||
| 571 | if ( defined $loc->seq_id ) { | ||||
| 572 | $loc_seq_id = $loc->seq_id; | ||||
| 573 | |||||
| 574 | # deal with remote sequences | ||||
| 575 | if ($loc_seq_id ne $seqid ) { | ||||
| 576 | # might be too big to download whole sequence | ||||
| 577 | $called_seq_seq = undef; | ||||
| 578 | |||||
| 579 | if ( defined $db ) { | ||||
| 580 | my $sid = $loc_seq_id; | ||||
| 581 | $sid =~ s/\.\d+$//g; | ||||
| 582 | eval { | ||||
| 583 | $called_seq = $db->get_Seq_by_acc($sid); | ||||
| 584 | }; | ||||
| 585 | if( $@ ) { | ||||
| 586 | $self->warn( "In attempting to join a remote location, sequence $sid " | ||||
| 587 | . "was not in database. Will provide padding N's. Full exception \n\n$@"); | ||||
| 588 | $called_seq = undef; | ||||
| 589 | } | ||||
| 590 | } | ||||
| 591 | else { | ||||
| 592 | $self->warn( "cannot get remote location for ".$loc_seq_id ." without a valid " | ||||
| 593 | . "Bio::DB::RandomAccessI database handle (like Bio::DB::GenBank)"); | ||||
| 594 | $called_seq = undef; | ||||
| 595 | } | ||||
| 596 | if ( !defined $called_seq ) { | ||||
| 597 | $seqstr .= 'N' x $loc->length; | ||||
| 598 | next; | ||||
| 599 | } | ||||
| 600 | } | ||||
| 601 | # have local sequence available | ||||
| 602 | else { | ||||
| 603 | # don't have to pull out source sequence again if it's local unless | ||||
| 604 | # it's the first exon or different from previous exon | ||||
| 605 | unless (defined(($last_id) && $last_id eq $loc_seq_id )){ | ||||
| 606 | $called_seq = $self->entire_seq; | ||||
| 607 | $called_seq_seq = $called_seq->seq(); # this is slow | ||||
| 608 | } | ||||
| 609 | } | ||||
| 610 | } | ||||
| 611 | #undefined $loc->seq->id | ||||
| 612 | else { | ||||
| 613 | $called_seq = $self->entire_seq; | ||||
| 614 | $called_seq_seq = undef; | ||||
| 615 | } | ||||
| 616 | |||||
| 617 | my ($start,$end) = ($loc->start,$loc->end); | ||||
| 618 | |||||
| 619 | # does the called sequence make sense? Bug 1780 | ||||
| 620 | my $called_seq_len; | ||||
| 621 | |||||
| 622 | # can avoid a seq() call on called_seq | ||||
| 623 | if (defined($called_seq_seq)) { | ||||
| 624 | $called_seq_len = length($called_seq_seq); | ||||
| 625 | } | ||||
| 626 | # can't avoid a seq() call on called_seq | ||||
| 627 | else { | ||||
| 628 | $called_seq_len = $called_seq->length # this is slow | ||||
| 629 | } | ||||
| 630 | |||||
| 631 | if ($called_seq_len < $loc->end) { | ||||
| 632 | my $accession = $called_seq->accession; | ||||
| 633 | my $orig_id = $self->seq_id; # originating sequence | ||||
| 634 | my ($locus) = $self->get_tagset_values("locus_tag"); | ||||
| 635 | $self->throw( "Location end ($end) exceeds length ($called_seq_len) of " | ||||
| 636 | . "called sequence $accession.\nCheck sequence version used in " | ||||
| 637 | . "$locus locus-tagged SeqFeature in $orig_id."); | ||||
| 638 | } | ||||
| 639 | |||||
| 640 | if ( $self->isa('Bio::Das::SegmentI') ) { | ||||
| 641 | # $called_seq is Bio::DB::GFF::RelSegment, as well as its subseq(); | ||||
| 642 | # Bio::DB::GFF::RelSegment::seq() returns a Bio::PrimarySeq, and using seq() | ||||
| 643 | # in turn returns a string. Confused? | ||||
| 644 | $seqstr .= $called_seq->subseq($start,$end)->seq()->seq(); # this is slow | ||||
| 645 | } | ||||
| 646 | else { | ||||
| 647 | my $exon_seq; | ||||
| 648 | if (defined ($called_seq_seq)){ | ||||
| 649 | $exon_seq = substr($called_seq_seq, $start-1, $end-$start+1); # this is quick | ||||
| 650 | } | ||||
| 651 | else { | ||||
| 652 | $exon_seq = $called_seq->subseq($loc->start,$loc->end); # this is slow | ||||
| 653 | } | ||||
| 654 | |||||
| 655 | # If guide_strand is defined, assemble the sequence first and revcom later if needed, | ||||
| 656 | # if its not defined, apply revcom immediately to proper locations | ||||
| 657 | if (defined $self->location->guide_strand) { | ||||
| 658 | $seqstr .= $exon_seq; | ||||
| 659 | } | ||||
| 660 | else { | ||||
| 661 | my $strand = defined ($loc->strand) ? ($loc->strand) : 0; | ||||
| 662 | |||||
| 663 | # revcomp $exon_seq | ||||
| 664 | if ($strand == -1) { | ||||
| 665 | $exon_seq = reverse($exon_seq); | ||||
| 666 | $exon_seq =~ tr/ABCDGHKMNRSTUVWXYabcdghkmnrstuvwxy/TVGHCDMKNYSAABWXRtvghcdmknysaabwxr/; | ||||
| 667 | $seqstr .= $exon_seq; | ||||
| 668 | } | ||||
| 669 | else { | ||||
| 670 | $seqstr .= $exon_seq; | ||||
| 671 | } | ||||
| 672 | } | ||||
| 673 | } | ||||
| 674 | |||||
| 675 | $last_id = $loc_seq_id if (defined($loc_seq_id)); | ||||
| 676 | } #next $loc | ||||
| 677 | |||||
| 678 | # Use revcom only after the whole sequence has been assembled | ||||
| 679 | my $guide_strand = defined ($self->location->guide_strand) ? ($self->location->guide_strand) : 0; | ||||
| 680 | if ($guide_strand == -1) { | ||||
| 681 | my $seqstr_obj = Bio::Seq->new(-seq => $seqstr); | ||||
| 682 | $seqstr = $seqstr_obj->revcom->seq; | ||||
| 683 | } | ||||
| 684 | |||||
| 685 | if (defined($phase)) { | ||||
| 686 | $seqstr = substr($seqstr, $phase); | ||||
| 687 | } | ||||
| 688 | |||||
| 689 | my $out = Bio::Seq->new( -id => $self->entire_seq->display_id | ||||
| 690 | . "_spliced_feat", | ||||
| 691 | -seq => $seqstr); | ||||
| 692 | |||||
| 693 | return $out; | ||||
| 694 | } | ||||
| 695 | |||||
| 696 | =head2 location | ||||
| 697 | |||||
| 698 | Title : location | ||||
| 699 | Usage : my $location = $seqfeature->location() | ||||
| 700 | Function: returns a location object suitable for identifying location | ||||
| 701 | of feature on sequence or parent feature | ||||
| 702 | Returns : Bio::LocationI object | ||||
| 703 | Args : none | ||||
| 704 | |||||
| 705 | |||||
| 706 | =cut | ||||
| 707 | |||||
| 708 | sub location { | ||||
| 709 | my ($self) = @_; | ||||
| 710 | |||||
| 711 | $self->throw_not_implemented(); | ||||
| 712 | } | ||||
| 713 | |||||
| 714 | |||||
| 715 | =head2 primary_id | ||||
| 716 | |||||
| 717 | Title : primary_id | ||||
| 718 | Usage : $obj->primary_id($newval) | ||||
| 719 | Function: | ||||
| 720 | Example : | ||||
| 721 | Returns : value of primary_id (a scalar) | ||||
| 722 | Args : on set, new value (a scalar or undef, optional) | ||||
| 723 | |||||
| 724 | Primary ID is a synonym for the tag 'ID' | ||||
| 725 | |||||
| 726 | =cut | ||||
| 727 | |||||
| 728 | sub primary_id{ | ||||
| 729 | my $self = shift; | ||||
| 730 | # note from cjm@fruitfly.org: | ||||
| 731 | # I have commented out the following 2 lines: | ||||
| 732 | |||||
| 733 | #return $self->{'primary_id'} = shift if @_; | ||||
| 734 | #return $self->{'primary_id'}; | ||||
| 735 | |||||
| 736 | #... and replaced it with the following; see | ||||
| 737 | # http://bioperl.org/pipermail/bioperl-l/2003-December/014150.html | ||||
| 738 | # for the discussion that lead to this change | ||||
| 739 | |||||
| 740 | if (@_) { | ||||
| 741 | if ($self->has_tag('ID')) { | ||||
| 742 | $self->remove_tag('ID'); | ||||
| 743 | } | ||||
| 744 | $self->add_tag_value('ID', shift); | ||||
| 745 | } | ||||
| 746 | my ($id) = $self->get_tagset_values('ID'); | ||||
| 747 | return $id; | ||||
| 748 | } | ||||
| 749 | |||||
| 750 | sub generate_unique_persistent_id { | ||||
| 751 | # DEPRECATED - us IDHandler | ||||
| 752 | my $self = shift; | ||||
| 753 | require Bio::SeqFeature::Tools::IDHandler; | ||||
| 754 | Bio::SeqFeature::Tools::IDHandler->new->generate_unique_persistent_id($self); | ||||
| 755 | } | ||||
| 756 | |||||
| 757 | |||||
| 758 | =head2 phase | ||||
| 759 | |||||
| 760 | Title : phase | ||||
| 761 | Usage : $obj->phase($newval) | ||||
| 762 | Function: get/set this feature's phase. | ||||
| 763 | Example : | ||||
| 764 | Returns : undef if no phase is set, | ||||
| 765 | otherwise 0, 1, or 2 (the only valid values for phase) | ||||
| 766 | Args : on set, the new value | ||||
| 767 | |||||
| 768 | Most features do not have or need a defined phase. | ||||
| 769 | |||||
| 770 | For features representing a CDS, the phase indicates where the feature | ||||
| 771 | begins with reference to the reading frame. The phase is one of the | ||||
| 772 | integers 0, 1, or 2, indicating the number of bases that should be | ||||
| 773 | removed from the beginning of this feature to reach the first base of | ||||
| 774 | the next codon. In other words, a phase of "0" indicates that the next | ||||
| 775 | codon begins at the first base of the region described by the current | ||||
| 776 | line, a phase of "1" indicates that the next codon begins at the | ||||
| 777 | second base of this region, and a phase of "2" indicates that the | ||||
| 778 | codon begins at the third base of this region. This is NOT to be | ||||
| 779 | confused with the frame, which is simply start modulo 3. | ||||
| 780 | |||||
| 781 | For forward strand features, phase is counted from the start | ||||
| 782 | field. For reverse strand features, phase is counted from the end | ||||
| 783 | field. | ||||
| 784 | |||||
| 785 | =cut | ||||
| 786 | |||||
| 787 | sub phase { | ||||
| 788 | my $self = shift; | ||||
| 789 | if( @_ ) { | ||||
| 790 | $self->remove_tag('phase') if $self->has_tag('phase'); | ||||
| 791 | my $newphase = shift; | ||||
| 792 | $self->throw("illegal phase value '$newphase', phase must be either undef, 0, 1, or 2") | ||||
| 793 | unless !defined $newphase || $newphase == 0 || $newphase == 1 || $newphase == 2; | ||||
| 794 | $self->add_tag_value('phase', $newphase ); | ||||
| 795 | return $newphase; | ||||
| 796 | } | ||||
| 797 | |||||
| 798 | return $self->has_tag('phase') ? ($self->get_tag_values('phase'))[0] : undef; | ||||
| 799 | } | ||||
| 800 | |||||
| 801 | |||||
| 802 | =head1 Bio::RangeI methods | ||||
| 803 | |||||
| 804 | These methods are inherited from RangeI and can be used | ||||
| 805 | directly from a SeqFeatureI interface. Remember that a | ||||
| 806 | SeqFeature is-a RangeI, and so wherever you see RangeI you | ||||
| 807 | can use a feature ($r in the below documentation). | ||||
| 808 | |||||
| 809 | =cut | ||||
| 810 | |||||
| 811 | =head2 start() | ||||
| 812 | |||||
| 813 | See L<Bio::RangeI> | ||||
| 814 | |||||
| 815 | =head2 end() | ||||
| 816 | |||||
| 817 | See L<Bio::RangeI> | ||||
| 818 | |||||
| 819 | =head2 strand() | ||||
| 820 | |||||
| 821 | See L<Bio::RangeI> | ||||
| 822 | |||||
| 823 | =head2 overlaps() | ||||
| 824 | |||||
| 825 | See L<Bio::RangeI> | ||||
| 826 | |||||
| 827 | =head2 contains() | ||||
| 828 | |||||
| 829 | See L<Bio::RangeI> | ||||
| 830 | |||||
| 831 | =head2 equals() | ||||
| 832 | |||||
| 833 | See L<Bio::RangeI> | ||||
| 834 | |||||
| 835 | =head2 intersection() | ||||
| 836 | |||||
| 837 | See L<Bio::RangeI> | ||||
| 838 | |||||
| 839 | =head2 union() | ||||
| 840 | |||||
| 841 | See L<Bio::RangeI> | ||||
| 842 | |||||
| 843 | =cut | ||||
| 844 | |||||
| 845 | 1 | 4µs | 1; |