| Filename | /Users/ap13/perl5/lib/perl5/Bio/PrimarySeqI.pm |
| Statements | Executed 7 statements in 2.77ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 2.36ms | 11.8ms | Bio::PrimarySeqI::BEGIN@124 |
| 1 | 1 | 1 | 21µs | 37µs | Bio::PrimarySeqI::BEGIN@123 |
| 1 | 1 | 1 | 9µs | 69µs | Bio::PrimarySeqI::BEGIN@126 |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::__ANON__[:899] |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::__ANON__[:900] |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::_attempt_to_load_Seq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::_find_orfs_nucleotide |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::_orf_sequence |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::_revcom_from_string |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::_setup_class |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::_truncate_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::accession_number |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::alphabet |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::can_call_new |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::desc |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::display_id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::is_circular |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::length |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::moltype |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::primary_id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::rev_transcribe |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::revcom |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::seq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::subseq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::transcribe |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::translate |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeqI::trunc |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # BioPerl module for Bio::PrimarySeqI | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
| 7 | # | ||||
| 8 | # Copyright Ewan Birney | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | |||||
| 12 | # POD documentation - main docs before the code | ||||
| 13 | |||||
| 14 | |||||
| 15 | =head1 NAME | ||||
| 16 | |||||
| 17 | Bio::PrimarySeqI - Interface definition for a Bio::PrimarySeq | ||||
| 18 | |||||
| 19 | =head1 SYNOPSIS | ||||
| 20 | |||||
| 21 | # Bio::PrimarySeqI is the interface class for sequences. | ||||
| 22 | # If you are a newcomer to bioperl, you might want to start with | ||||
| 23 | # Bio::Seq documentation. | ||||
| 24 | |||||
| 25 | # Test if this is a seq object | ||||
| 26 | $obj->isa("Bio::PrimarySeqI") || | ||||
| 27 | $obj->throw("$obj does not implement the Bio::PrimarySeqI interface"); | ||||
| 28 | |||||
| 29 | # Accessors | ||||
| 30 | $string = $obj->seq(); | ||||
| 31 | $substring = $obj->subseq(12,50); | ||||
| 32 | $display = $obj->display_id(); # for human display | ||||
| 33 | $id = $obj->primary_id(); # unique id for this object, | ||||
| 34 | # implementation defined | ||||
| 35 | $unique_key= $obj->accession_number(); # unique biological id | ||||
| 36 | |||||
| 37 | |||||
| 38 | # Object manipulation | ||||
| 39 | eval { | ||||
| 40 | $rev = $obj->revcom(); | ||||
| 41 | }; | ||||
| 42 | if( $@ ) { | ||||
| 43 | $obj->throw( "Could not reverse complement. ". | ||||
| 44 | "Probably not DNA. Actual exception\n$@\n" ); | ||||
| 45 | } | ||||
| 46 | |||||
| 47 | $trunc = $obj->trunc(12,50); | ||||
| 48 | # $rev and $trunc are Bio::PrimarySeqI compliant objects | ||||
| 49 | |||||
| 50 | |||||
| 51 | =head1 DESCRIPTION | ||||
| 52 | |||||
| 53 | This object defines an abstract interface to basic sequence | ||||
| 54 | information - for most users of the package the documentation (and | ||||
| 55 | methods) in this class are not useful - this is a developers-only | ||||
| 56 | class which defines what methods have to be implmented by other Perl | ||||
| 57 | objects to comply to the Bio::PrimarySeqI interface. Go "perldoc | ||||
| 58 | Bio::Seq" or "man Bio::Seq" for more information on the main class for | ||||
| 59 | sequences. | ||||
| 60 | |||||
| 61 | PrimarySeq is an object just for the sequence and its name(s), nothing | ||||
| 62 | more. Seq is the larger object complete with features. There is a pure | ||||
| 63 | perl implementation of this in L<Bio::PrimarySeq>. If you just want to | ||||
| 64 | use L<Bio::PrimarySeq> objects, then please read that module first. This | ||||
| 65 | module defines the interface, and is of more interest to people who | ||||
| 66 | want to wrap their own Perl Objects/RDBs/FileSystems etc in way that | ||||
| 67 | they "are" bioperl sequence objects, even though it is not using Perl | ||||
| 68 | to store the sequence etc. | ||||
| 69 | |||||
| 70 | This interface defines what bioperl considers necessary to "be" a | ||||
| 71 | sequence, without providing an implementation of this, an | ||||
| 72 | implementation is provided in L<Bio::PrimarySeq>. If you want to provide | ||||
| 73 | a Bio::PrimarySeq-compliant object which in fact wraps another | ||||
| 74 | object/database/out-of-perl experience, then this is the correct thing | ||||
| 75 | to wrap, generally by providing a wrapper class which would inherit | ||||
| 76 | from your object and this Bio::PrimarySeqI interface. The wrapper class | ||||
| 77 | then would have methods lists in the "Implementation Specific | ||||
| 78 | Functions" which would provide these methods for your object. | ||||
| 79 | |||||
| 80 | =head1 FEEDBACK | ||||
| 81 | |||||
| 82 | =head2 Mailing Lists | ||||
| 83 | |||||
| 84 | User feedback is an integral part of the evolution of this and other | ||||
| 85 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
| 86 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
| 87 | |||||
| 88 | bioperl-l@bioperl.org - General discussion | ||||
| 89 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 90 | |||||
| 91 | =head2 Support | ||||
| 92 | |||||
| 93 | Please direct usage questions or support issues to the mailing list: | ||||
| 94 | |||||
| 95 | I<bioperl-l@bioperl.org> | ||||
| 96 | |||||
| 97 | rather than to the module maintainer directly. Many experienced and | ||||
| 98 | reponsive experts will be able look at the problem and quickly | ||||
| 99 | address it. Please include a thorough description of the problem | ||||
| 100 | with code and data examples if at all possible. | ||||
| 101 | |||||
| 102 | =head2 Reporting Bugs | ||||
| 103 | |||||
| 104 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 105 | the bugs and their resolution. Bug reports can be submitted via the | ||||
| 106 | web: | ||||
| 107 | |||||
| 108 | https://github.com/bioperl/bioperl-live/issues | ||||
| 109 | |||||
| 110 | =head1 AUTHOR - Ewan Birney | ||||
| 111 | |||||
| 112 | Email birney@ebi.ac.uk | ||||
| 113 | |||||
| 114 | =head1 APPENDIX | ||||
| 115 | |||||
| 116 | The rest of the documentation details each of the object | ||||
| 117 | methods. Internal methods are usually preceded with a _ | ||||
| 118 | |||||
| 119 | =cut | ||||
| 120 | |||||
| 121 | |||||
| 122 | package Bio::PrimarySeqI; | ||||
| 123 | 2 | 30µs | 2 | 52µs | # spent 37µs (21+15) within Bio::PrimarySeqI::BEGIN@123 which was called:
# once (21µs+15µs) by base::import at line 123 # spent 37µs making 1 call to Bio::PrimarySeqI::BEGIN@123
# spent 15µs making 1 call to strict::import |
| 124 | 2 | 208µs | 1 | 11.8ms | # spent 11.8ms (2.36+9.40) within Bio::PrimarySeqI::BEGIN@124 which was called:
# once (2.36ms+9.40ms) by base::import at line 124 # spent 11.8ms making 1 call to Bio::PrimarySeqI::BEGIN@124 |
| 125 | |||||
| 126 | 2 | 2.53ms | 2 | 69µs | # spent 69µs (9+60) within Bio::PrimarySeqI::BEGIN@126 which was called:
# once (9µs+60µs) by base::import at line 126 # spent 69µs making 1 call to Bio::PrimarySeqI::BEGIN@126
# spent 60µs making 1 call to base::import, recursion: max depth 2, sum of overlapping time 60µs |
| 127 | |||||
| 128 | |||||
| 129 | =head1 Implementation-specific Functions | ||||
| 130 | |||||
| 131 | These functions are the ones that a specific implementation must | ||||
| 132 | define. | ||||
| 133 | |||||
| 134 | =head2 seq | ||||
| 135 | |||||
| 136 | Title : seq | ||||
| 137 | Usage : $string = $obj->seq() | ||||
| 138 | Function: Returns the sequence as a string of letters. The | ||||
| 139 | case of the letters is left up to the implementer. | ||||
| 140 | Suggested cases are upper case for proteins and lower case for | ||||
| 141 | DNA sequence (IUPAC standard), but implementations are suggested to | ||||
| 142 | keep an open mind about case (some users... want mixed case!) | ||||
| 143 | Returns : A scalar | ||||
| 144 | Status : Virtual | ||||
| 145 | |||||
| 146 | =cut | ||||
| 147 | |||||
| 148 | sub seq { | ||||
| 149 | my ($self) = @_; | ||||
| 150 | $self->throw_not_implemented(); | ||||
| 151 | } | ||||
| 152 | |||||
| 153 | |||||
| 154 | =head2 subseq | ||||
| 155 | |||||
| 156 | Title : subseq | ||||
| 157 | Usage : $substring = $obj->subseq(10,40); | ||||
| 158 | Function: Returns the subseq from start to end, where the first base | ||||
| 159 | is 1 and the number is inclusive, i.e. 1-2 are the first two | ||||
| 160 | bases of the sequence. | ||||
| 161 | |||||
| 162 | Start cannot be larger than end but can be equal. | ||||
| 163 | |||||
| 164 | Returns : A string | ||||
| 165 | Args : | ||||
| 166 | Status : Virtual | ||||
| 167 | |||||
| 168 | =cut | ||||
| 169 | |||||
| 170 | sub subseq { | ||||
| 171 | my ($self) = @_; | ||||
| 172 | $self->throw_not_implemented(); | ||||
| 173 | } | ||||
| 174 | |||||
| 175 | |||||
| 176 | =head2 display_id | ||||
| 177 | |||||
| 178 | Title : display_id | ||||
| 179 | Usage : $id_string = $obj->display_id(); | ||||
| 180 | Function: Returns the display id, also known as the common name of the Sequence | ||||
| 181 | object. | ||||
| 182 | |||||
| 183 | The semantics of this is that it is the most likely string | ||||
| 184 | to be used as an identifier of the sequence, and likely to | ||||
| 185 | have "human" readability. The id is equivalent to the ID | ||||
| 186 | field of the GenBank/EMBL databanks and the id field of the | ||||
| 187 | Swissprot/sptrembl database. In fasta format, the >(\S+) is | ||||
| 188 | presumed to be the id, though some people overload the id | ||||
| 189 | to embed other information. Bioperl does not use any | ||||
| 190 | embedded information in the ID field, and people are | ||||
| 191 | encouraged to use other mechanisms (accession field for | ||||
| 192 | example, or extending the sequence object) to solve this. | ||||
| 193 | |||||
| 194 | Notice that $seq->id() maps to this function, mainly for | ||||
| 195 | legacy/convenience reasons. | ||||
| 196 | Returns : A string | ||||
| 197 | Args : None | ||||
| 198 | Status : Virtual | ||||
| 199 | |||||
| 200 | =cut | ||||
| 201 | |||||
| 202 | sub display_id { | ||||
| 203 | my ($self) = @_; | ||||
| 204 | $self->throw_not_implemented(); | ||||
| 205 | } | ||||
| 206 | |||||
| 207 | |||||
| 208 | =head2 accession_number | ||||
| 209 | |||||
| 210 | Title : accession_number | ||||
| 211 | Usage : $unique_biological_key = $obj->accession_number; | ||||
| 212 | Function: Returns the unique biological id for a sequence, commonly | ||||
| 213 | called the accession_number. For sequences from established | ||||
| 214 | databases, the implementors should try to use the correct | ||||
| 215 | accession number. Notice that primary_id() provides the | ||||
| 216 | unique id for the implemetation, allowing multiple objects | ||||
| 217 | to have the same accession number in a particular implementation. | ||||
| 218 | |||||
| 219 | For sequences with no accession number, this method should return | ||||
| 220 | "unknown". | ||||
| 221 | Returns : A string | ||||
| 222 | Args : None | ||||
| 223 | Status : Virtual | ||||
| 224 | |||||
| 225 | =cut | ||||
| 226 | |||||
| 227 | sub accession_number { | ||||
| 228 | my ($self,@args) = @_; | ||||
| 229 | $self->throw_not_implemented(); | ||||
| 230 | } | ||||
| 231 | |||||
| 232 | |||||
| 233 | =head2 primary_id | ||||
| 234 | |||||
| 235 | Title : primary_id | ||||
| 236 | Usage : $unique_implementation_key = $obj->primary_id; | ||||
| 237 | Function: Returns the unique id for this object in this | ||||
| 238 | implementation. This allows implementations to manage their | ||||
| 239 | own object ids in a way the implementaiton can control | ||||
| 240 | clients can expect one id to map to one object. | ||||
| 241 | |||||
| 242 | For sequences with no accession number, this method should | ||||
| 243 | return a stringified memory location. | ||||
| 244 | |||||
| 245 | Returns : A string | ||||
| 246 | Args : None | ||||
| 247 | Status : Virtual | ||||
| 248 | |||||
| 249 | =cut | ||||
| 250 | |||||
| 251 | sub primary_id { | ||||
| 252 | my ($self,@args) = @_; | ||||
| 253 | $self->throw_not_implemented(); | ||||
| 254 | } | ||||
| 255 | |||||
| 256 | |||||
| 257 | =head2 can_call_new | ||||
| 258 | |||||
| 259 | Title : can_call_new | ||||
| 260 | Usage : if( $obj->can_call_new ) { | ||||
| 261 | $newobj = $obj->new( %param ); | ||||
| 262 | } | ||||
| 263 | Function: Can_call_new returns 1 or 0 depending | ||||
| 264 | on whether an implementation allows new | ||||
| 265 | constructor to be called. If a new constructor | ||||
| 266 | is allowed, then it should take the followed hashed | ||||
| 267 | constructor list. | ||||
| 268 | |||||
| 269 | $myobject->new( -seq => $sequence_as_string, | ||||
| 270 | -display_id => $id | ||||
| 271 | -accession_number => $accession | ||||
| 272 | -alphabet => 'dna', | ||||
| 273 | ); | ||||
| 274 | Returns : 1 or 0 | ||||
| 275 | Args : | ||||
| 276 | |||||
| 277 | |||||
| 278 | =cut | ||||
| 279 | |||||
| 280 | sub can_call_new { | ||||
| 281 | my ($self,@args) = @_; | ||||
| 282 | # we default to 0 here | ||||
| 283 | return 0; | ||||
| 284 | } | ||||
| 285 | |||||
| 286 | |||||
| 287 | =head2 alphabet | ||||
| 288 | |||||
| 289 | Title : alphabet | ||||
| 290 | Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | ||||
| 291 | Function: Returns the type of sequence being one of | ||||
| 292 | 'dna', 'rna' or 'protein'. This is case sensitive. | ||||
| 293 | |||||
| 294 | This is not called "type" because this would cause | ||||
| 295 | upgrade problems from the 0.5 and earlier Seq objects. | ||||
| 296 | |||||
| 297 | Returns : A string either 'dna','rna','protein'. NB - the object must | ||||
| 298 | make a call of the alphabet, if there is no alphabet specified it | ||||
| 299 | has to guess. | ||||
| 300 | Args : None | ||||
| 301 | Status : Virtual | ||||
| 302 | |||||
| 303 | =cut | ||||
| 304 | |||||
| 305 | sub alphabet { | ||||
| 306 | my ( $self ) = @_; | ||||
| 307 | $self->throw_not_implemented(); | ||||
| 308 | } | ||||
| 309 | |||||
| 310 | |||||
| 311 | =head2 moltype | ||||
| 312 | |||||
| 313 | Title : moltype | ||||
| 314 | Usage : Deprecated. Use alphabet() instead. | ||||
| 315 | |||||
| 316 | =cut | ||||
| 317 | |||||
| 318 | sub moltype { | ||||
| 319 | my ($self,@args) = @_; | ||||
| 320 | $self->warn("moltype: pre v1.0 method. Calling alphabet() instead..."); | ||||
| 321 | return $self->alphabet(@args); | ||||
| 322 | } | ||||
| 323 | |||||
| 324 | |||||
| 325 | =head1 Implementation-optional Functions | ||||
| 326 | |||||
| 327 | The following functions rely on the above functions. An | ||||
| 328 | implementing class does not need to provide these functions, as they | ||||
| 329 | will be provided by this class, but is free to override these | ||||
| 330 | functions. | ||||
| 331 | |||||
| 332 | The revcom(), trunc(), and translate() methods create new sequence | ||||
| 333 | objects. They will call new() on the class of the sequence object | ||||
| 334 | instance passed as argument, unless can_call_new() returns FALSE. In | ||||
| 335 | the latter case a Bio::PrimarySeq object will be created. Implementors | ||||
| 336 | which really want to control how objects are created (eg, for object | ||||
| 337 | persistence over a database, or objects in a CORBA framework), they | ||||
| 338 | are encouraged to override these methods | ||||
| 339 | |||||
| 340 | =head2 revcom | ||||
| 341 | |||||
| 342 | Title : revcom | ||||
| 343 | Usage : $rev = $seq->revcom() | ||||
| 344 | Function: Produces a new Bio::PrimarySeqI implementing object which | ||||
| 345 | is the reversed complement of the sequence. For protein | ||||
| 346 | sequences this throws an exception of "Sequence is a | ||||
| 347 | protein. Cannot revcom". | ||||
| 348 | |||||
| 349 | The id is the same id as the original sequence, and the | ||||
| 350 | accession number is also indentical. If someone wants to | ||||
| 351 | track that this sequence has be reversed, it needs to | ||||
| 352 | define its own extensions. | ||||
| 353 | |||||
| 354 | To do an inplace edit of an object you can go: | ||||
| 355 | |||||
| 356 | $seq = $seq->revcom(); | ||||
| 357 | |||||
| 358 | This of course, causes Perl to handle the garbage | ||||
| 359 | collection of the old object, but it is roughly speaking as | ||||
| 360 | efficient as an inplace edit. | ||||
| 361 | |||||
| 362 | Returns : A new (fresh) Bio::PrimarySeqI object | ||||
| 363 | Args : None | ||||
| 364 | |||||
| 365 | |||||
| 366 | =cut | ||||
| 367 | |||||
| 368 | sub revcom { | ||||
| 369 | my ($self) = @_; | ||||
| 370 | |||||
| 371 | # Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' | ||||
| 372 | # or 'Bio::Seq::LargeSeq', if not take advantage of | ||||
| 373 | # Bio::Root::clone to get an object copy | ||||
| 374 | my $out; | ||||
| 375 | if ( $self->isa('Bio::Seq::LargePrimarySeq') | ||||
| 376 | or $self->isa('Bio::Seq::LargeSeq') | ||||
| 377 | ) { | ||||
| 378 | my ($seqclass, $opts) = $self->_setup_class; | ||||
| 379 | $out = $seqclass->new( | ||||
| 380 | -seq => $self->_revcom_from_string($self->seq, $self->alphabet), | ||||
| 381 | -is_circular => $self->is_circular, | ||||
| 382 | -display_id => $self->display_id, | ||||
| 383 | -accession_number => $self->accession_number, | ||||
| 384 | -alphabet => $self->alphabet, | ||||
| 385 | -desc => $self->desc, | ||||
| 386 | -verbose => $self->verbose, | ||||
| 387 | %$opts, | ||||
| 388 | ); | ||||
| 389 | } else { | ||||
| 390 | $out = $self->clone; | ||||
| 391 | $out->seq( $out->_revcom_from_string($out->seq, $out->alphabet) ); | ||||
| 392 | } | ||||
| 393 | return $out; | ||||
| 394 | } | ||||
| 395 | |||||
| 396 | |||||
| 397 | sub _revcom_from_string { | ||||
| 398 | my ($self, $string, $alphabet) = @_; | ||||
| 399 | |||||
| 400 | # Check that reverse-complementing makes sense | ||||
| 401 | if( $alphabet eq 'protein' ) { | ||||
| 402 | $self->throw("Sequence is a protein. Cannot revcom."); | ||||
| 403 | } | ||||
| 404 | if( $alphabet ne 'dna' && $alphabet ne 'rna' ) { | ||||
| 405 | my $msg = "Sequence is not dna or rna, but [$alphabet]. Attempting to revcom, ". | ||||
| 406 | "but unsure if this is right."; | ||||
| 407 | if( $self->can('warn') ) { | ||||
| 408 | $self->warn($msg); | ||||
| 409 | } else { | ||||
| 410 | warn("[$self] $msg"); | ||||
| 411 | } | ||||
| 412 | } | ||||
| 413 | |||||
| 414 | # If sequence is RNA, map to DNA (then map back later) | ||||
| 415 | if( $alphabet eq 'rna' ) { | ||||
| 416 | $string =~ tr/uU/tT/; | ||||
| 417 | } | ||||
| 418 | |||||
| 419 | # Reverse-complement now | ||||
| 420 | $string =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; | ||||
| 421 | $string = CORE::reverse $string; | ||||
| 422 | |||||
| 423 | # Map back RNA to DNA | ||||
| 424 | if( $alphabet eq 'rna' ) { | ||||
| 425 | $string =~ tr/tT/uU/; | ||||
| 426 | } | ||||
| 427 | |||||
| 428 | return $string; | ||||
| 429 | } | ||||
| 430 | |||||
| 431 | |||||
| 432 | =head2 trunc | ||||
| 433 | |||||
| 434 | Title : trunc | ||||
| 435 | Usage : $subseq = $myseq->trunc(10,100); | ||||
| 436 | Function: Provides a truncation of a sequence. | ||||
| 437 | Returns : A fresh Bio::PrimarySeqI implementing object. | ||||
| 438 | Args : Two integers denoting first and last base of the sub-sequence. | ||||
| 439 | |||||
| 440 | |||||
| 441 | =cut | ||||
| 442 | |||||
| 443 | sub trunc { | ||||
| 444 | my ($self,$start,$end) = @_; | ||||
| 445 | |||||
| 446 | my $str; | ||||
| 447 | if( defined $start && ref($start) && | ||||
| 448 | $start->isa('Bio::LocationI') ) { | ||||
| 449 | $str = $self->subseq($start); # start is a location actually | ||||
| 450 | } elsif( !$end ) { | ||||
| 451 | $self->throw("trunc start,end -- there was no end for $start"); | ||||
| 452 | } elsif( $end < $start ) { | ||||
| 453 | my $msg = "start [$start] is greater than end [$end]. \n". | ||||
| 454 | "If you want to truncated and reverse complement, \n". | ||||
| 455 | "you must call trunc followed by revcom. Sorry."; | ||||
| 456 | $self->throw($msg); | ||||
| 457 | } else { | ||||
| 458 | $str = $self->subseq($start,$end); | ||||
| 459 | } | ||||
| 460 | |||||
| 461 | # Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' | ||||
| 462 | # or 'Bio::Seq::LargeSeq', if not take advantage of | ||||
| 463 | # Bio::Root::clone to get an object copy | ||||
| 464 | my $out; | ||||
| 465 | if ( $self->isa('Bio::Seq::LargePrimarySeq') | ||||
| 466 | or $self->isa('Bio::Seq::LargeSeq') | ||||
| 467 | ) { | ||||
| 468 | my ($seqclass, $opts) = $self->_setup_class; | ||||
| 469 | $out = $seqclass->new( | ||||
| 470 | -seq => $str, | ||||
| 471 | -is_circular => $self->is_circular, | ||||
| 472 | -display_id => $self->display_id, | ||||
| 473 | -accession_number => $self->accession_number, | ||||
| 474 | -alphabet => $self->alphabet, | ||||
| 475 | -desc => $self->desc, | ||||
| 476 | -verbose => $self->verbose, | ||||
| 477 | %$opts, | ||||
| 478 | ); | ||||
| 479 | } else { | ||||
| 480 | $out = $self->clone; | ||||
| 481 | $out->seq($str); | ||||
| 482 | } | ||||
| 483 | return $out; | ||||
| 484 | } | ||||
| 485 | |||||
| 486 | |||||
| 487 | =head2 translate | ||||
| 488 | |||||
| 489 | Title : translate | ||||
| 490 | Usage : $protein_seq_obj = $dna_seq_obj->translate | ||||
| 491 | |||||
| 492 | Or if you expect a complete coding sequence (CDS) translation, | ||||
| 493 | with initiator at the beginning and terminator at the end: | ||||
| 494 | |||||
| 495 | $protein_seq_obj = $cds_seq_obj->translate(-complete => 1); | ||||
| 496 | |||||
| 497 | Or if you want translate() to find the first initiation | ||||
| 498 | codon and return the corresponding protein: | ||||
| 499 | |||||
| 500 | $protein_seq_obj = $cds_seq_obj->translate(-orf => 1); | ||||
| 501 | |||||
| 502 | Function: Provides the translation of the DNA sequence using full | ||||
| 503 | IUPAC ambiguities in DNA/RNA and amino acid codes. | ||||
| 504 | |||||
| 505 | The complete CDS translation is identical to EMBL/TREMBL | ||||
| 506 | database translation. Note that the trailing terminator | ||||
| 507 | character is removed before returning the translated protein | ||||
| 508 | object. | ||||
| 509 | |||||
| 510 | Note: if you set $dna_seq_obj->verbose(1) you will get a | ||||
| 511 | warning if the first codon is not a valid initiator. | ||||
| 512 | |||||
| 513 | Returns : A Bio::PrimarySeqI implementing object | ||||
| 514 | Args : -terminator | ||||
| 515 | character for terminator, default '*' | ||||
| 516 | -unknown | ||||
| 517 | character for unknown, default 'X' | ||||
| 518 | -frame | ||||
| 519 | positive integer frame shift (in bases), default 0 | ||||
| 520 | -codontable_id | ||||
| 521 | integer codon table id, default 1 | ||||
| 522 | -complete | ||||
| 523 | boolean, if true, complete CDS is expected. default false | ||||
| 524 | -complete_codons | ||||
| 525 | boolean, if true, codons which are incomplete are translated if a | ||||
| 526 | suitable amino acid is found. For instance, if the incomplete | ||||
| 527 | codon is 'GG', the completed codon is 'GGN', which is glycine | ||||
| 528 | (G). Defaults to 'false'; setting '-complete' also makes this | ||||
| 529 | true. | ||||
| 530 | -throw | ||||
| 531 | boolean, throw exception if ORF not complete, default false | ||||
| 532 | -orf | ||||
| 533 | if 'longest', find longest ORF. other true value, find | ||||
| 534 | first ORF. default 0 | ||||
| 535 | -codontable | ||||
| 536 | optional L<Bio::Tools::CodonTable> object to use for | ||||
| 537 | translation | ||||
| 538 | -start | ||||
| 539 | optional three-character string to force as initiation | ||||
| 540 | codon (e.g. 'atg'). If unset, start codons are | ||||
| 541 | determined by the CodonTable. Case insensitive. | ||||
| 542 | -offset | ||||
| 543 | optional positive integer offset for fuzzy locations. | ||||
| 544 | if set, must be either 1, 2, or 3 | ||||
| 545 | |||||
| 546 | =head3 Notes | ||||
| 547 | |||||
| 548 | The -start argument only applies when -orf is set to 1. By default all | ||||
| 549 | initiation codons found in the given codon table are used but when | ||||
| 550 | "start" is set to some codon this codon will be used exclusively as | ||||
| 551 | the initiation codon. Note that the default codon table (NCBI | ||||
| 552 | "Standard") has 3 initiation codons! | ||||
| 553 | |||||
| 554 | By default translate() translates termination codons to the some | ||||
| 555 | character (default is *), both internal and trailing codons. Setting | ||||
| 556 | "-complete" to 1 tells translate() to remove the trailing character. | ||||
| 557 | |||||
| 558 | -offset is used for seqfeatures which contain the the \codon_start tag | ||||
| 559 | and can be set to 1, 2, or 3. This is the offset by which the | ||||
| 560 | sequence translation starts relative to the first base of the feature | ||||
| 561 | |||||
| 562 | For details on codon tables used by translate() see L<Bio::Tools::CodonTable>. | ||||
| 563 | |||||
| 564 | Deprecated argument set (v. 1.5.1 and prior versions) where each argument is an | ||||
| 565 | element in an array: | ||||
| 566 | |||||
| 567 | 1: character for terminator (optional), defaults to '*'. | ||||
| 568 | 2: character for unknown amino acid (optional), defaults to 'X'. | ||||
| 569 | 3: frame (optional), valid values are 0, 1, 2, defaults to 0. | ||||
| 570 | 4: codon table id (optional), defaults to 1. | ||||
| 571 | 5: complete coding sequence expected, defaults to 0 (false). | ||||
| 572 | 6: boolean, throw exception if not complete coding sequence | ||||
| 573 | (true), defaults to warning (false) | ||||
| 574 | 7: codontable, a custom Bio::Tools::CodonTable object (optional). | ||||
| 575 | |||||
| 576 | =cut | ||||
| 577 | |||||
| 578 | sub translate { | ||||
| 579 | my ($self,@args) = @_; | ||||
| 580 | my ($terminator, $unknown, $frame, $codonTableId, $complete, | ||||
| 581 | $complete_codons, $throw, $codonTable, $orf, $start_codon, $offset); | ||||
| 582 | |||||
| 583 | ## new API with named parameters, post 1.5.1 | ||||
| 584 | if ($args[0] && $args[0] =~ /^-[A-Z]+/i) { | ||||
| 585 | ($terminator, $unknown, $frame, $codonTableId, $complete, | ||||
| 586 | $complete_codons, $throw,$codonTable, $orf, $start_codon, $offset) = | ||||
| 587 | $self->_rearrange([qw(TERMINATOR | ||||
| 588 | UNKNOWN | ||||
| 589 | FRAME | ||||
| 590 | CODONTABLE_ID | ||||
| 591 | COMPLETE | ||||
| 592 | COMPLETE_CODONS | ||||
| 593 | THROW | ||||
| 594 | CODONTABLE | ||||
| 595 | ORF | ||||
| 596 | START | ||||
| 597 | OFFSET)], @args); | ||||
| 598 | ## old API, 1.5.1 and preceding versions | ||||
| 599 | } else { | ||||
| 600 | ($terminator, $unknown, $frame, $codonTableId, | ||||
| 601 | $complete, $throw, $codonTable, $offset) = @args; | ||||
| 602 | } | ||||
| 603 | |||||
| 604 | ## Initialize termination codon, unknown codon, codon table id, frame | ||||
| 605 | $terminator = '*' unless (defined($terminator) and $terminator ne ''); | ||||
| 606 | $unknown = "X" unless (defined($unknown) and $unknown ne ''); | ||||
| 607 | $frame = 0 unless (defined($frame) and $frame ne ''); | ||||
| 608 | $codonTableId = 1 unless (defined($codonTableId) and $codonTableId ne ''); | ||||
| 609 | $complete_codons ||= $complete || 0; | ||||
| 610 | |||||
| 611 | ## Get a CodonTable, error if custom CodonTable is invalid | ||||
| 612 | if ($codonTable) { | ||||
| 613 | $self->throw("Need a Bio::Tools::CodonTable object, not ". $codonTable) | ||||
| 614 | unless $codonTable->isa('Bio::Tools::CodonTable'); | ||||
| 615 | } else { | ||||
| 616 | |||||
| 617 | # shouldn't this be cached? Seems wasteful to have a new instance | ||||
| 618 | # every time... | ||||
| 619 | $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); | ||||
| 620 | } | ||||
| 621 | |||||
| 622 | ## Error if alphabet is "protein" | ||||
| 623 | $self->throw("Can't translate an amino acid sequence.") if | ||||
| 624 | ($self->alphabet =~ /protein/i); | ||||
| 625 | |||||
| 626 | ## Error if -start parameter isn't a valid codon | ||||
| 627 | if ($start_codon) { | ||||
| 628 | $self->throw("Invalid start codon: $start_codon.") if | ||||
| 629 | ( $start_codon !~ /^[A-Z]{3}$/i ); | ||||
| 630 | } | ||||
| 631 | |||||
| 632 | my $seq; | ||||
| 633 | if ($offset) { | ||||
| 634 | $self->throw("Offset must be 1, 2, or 3.") if | ||||
| 635 | ( $offset !~ /^[123]$/ ); | ||||
| 636 | my ($start, $end) = ($offset, $self->length); | ||||
| 637 | ($seq) = $self->subseq($start, $end); | ||||
| 638 | } else { | ||||
| 639 | ($seq) = $self->seq(); | ||||
| 640 | } | ||||
| 641 | |||||
| 642 | ## ignore frame if an ORF is supposed to be found | ||||
| 643 | if ( $orf ) { | ||||
| 644 | my ($orf_region) = $self->_find_orfs_nucleotide( $seq, $codonTable, $start_codon, $orf eq 'longest' ? 0 : 'first_only' ); | ||||
| 645 | $seq = $self->_orf_sequence( $seq, $orf_region ); | ||||
| 646 | } else { | ||||
| 647 | ## use frame, error if frame is not 0, 1 or 2 | ||||
| 648 | $self->throw("Valid values for frame are 0, 1, or 2, not $frame.") | ||||
| 649 | unless ($frame == 0 or $frame == 1 or $frame == 2); | ||||
| 650 | $seq = substr($seq,$frame); | ||||
| 651 | } | ||||
| 652 | |||||
| 653 | ## Translate it | ||||
| 654 | my $output = $codonTable->translate($seq, $complete_codons); | ||||
| 655 | # Use user-input terminator/unknown | ||||
| 656 | $output =~ s/\*/$terminator/g; | ||||
| 657 | $output =~ s/X/$unknown/g; | ||||
| 658 | |||||
| 659 | ## Only if we are expecting to translate a complete coding region | ||||
| 660 | if ($complete) { | ||||
| 661 | my $id = $self->display_id; | ||||
| 662 | # remove the terminator character | ||||
| 663 | if( substr($output,-1,1) eq $terminator ) { | ||||
| 664 | chop $output; | ||||
| 665 | } else { | ||||
| 666 | $throw && $self->throw("Seq [$id]: Not using a valid terminator codon!"); | ||||
| 667 | $self->warn("Seq [$id]: Not using a valid terminator codon!"); | ||||
| 668 | } | ||||
| 669 | # test if there are terminator characters inside the protein sequence! | ||||
| 670 | if ($output =~ /\Q$terminator\E/) { | ||||
| 671 | $id ||= ''; | ||||
| 672 | $throw && $self->throw("Seq [$id]: Terminator codon inside CDS!"); | ||||
| 673 | $self->warn("Seq [$id]: Terminator codon inside CDS!"); | ||||
| 674 | } | ||||
| 675 | # if the initiator codon is not ATG, the amino acid needs to be changed to M | ||||
| 676 | if ( substr($output,0,1) ne 'M' ) { | ||||
| 677 | if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) { | ||||
| 678 | $output = 'M'. substr($output,1); | ||||
| 679 | } elsif ($throw) { | ||||
| 680 | $self->throw("Seq [$id]: Not using a valid initiator codon!"); | ||||
| 681 | } else { | ||||
| 682 | $self->warn("Seq [$id]: Not using a valid initiator codon!"); | ||||
| 683 | } | ||||
| 684 | } | ||||
| 685 | } | ||||
| 686 | |||||
| 687 | # Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' | ||||
| 688 | # or 'Bio::Seq::LargeSeq', if not take advantage of | ||||
| 689 | # Bio::Root::clone to get an object copy | ||||
| 690 | my $out; | ||||
| 691 | if ( $self->isa('Bio::Seq::LargePrimarySeq') | ||||
| 692 | or $self->isa('Bio::Seq::LargeSeq') | ||||
| 693 | ) { | ||||
| 694 | my ($seqclass, $opts) = $self->_setup_class; | ||||
| 695 | $out = $seqclass->new( | ||||
| 696 | -seq => $output, | ||||
| 697 | -is_circular => $self->is_circular, | ||||
| 698 | -display_id => $self->display_id, | ||||
| 699 | -accession_number => $self->accession_number, | ||||
| 700 | -alphabet => 'protein', | ||||
| 701 | -desc => $self->desc, | ||||
| 702 | -verbose => $self->verbose, | ||||
| 703 | %$opts, | ||||
| 704 | ); | ||||
| 705 | } else { | ||||
| 706 | $out = $self->clone; | ||||
| 707 | $out->seq($output); | ||||
| 708 | $out->alphabet('protein'); | ||||
| 709 | } | ||||
| 710 | return $out; | ||||
| 711 | } | ||||
| 712 | |||||
| 713 | |||||
| 714 | =head2 transcribe() | ||||
| 715 | |||||
| 716 | Title : transcribe | ||||
| 717 | Usage : $xseq = $seq->transcribe | ||||
| 718 | Function: Convert base T to base U | ||||
| 719 | Returns : PrimarySeqI object of alphabet 'rna' or | ||||
| 720 | undef if $seq->alphabet ne 'dna' | ||||
| 721 | Args : | ||||
| 722 | |||||
| 723 | =cut | ||||
| 724 | |||||
| 725 | sub transcribe { | ||||
| 726 | my $self = shift; | ||||
| 727 | return unless $self->alphabet eq 'dna'; | ||||
| 728 | my $s = $self->seq; | ||||
| 729 | $s =~ tr/tT/uU/; | ||||
| 730 | my $desc = $self->desc || ''; | ||||
| 731 | |||||
| 732 | # Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' | ||||
| 733 | # or 'Bio::Seq::LargeSeq', if not take advantage of | ||||
| 734 | # Bio::Root::clone to get an object copy | ||||
| 735 | my $out; | ||||
| 736 | if ( $self->isa('Bio::Seq::LargePrimarySeq') | ||||
| 737 | or $self->isa('Bio::Seq::LargeSeq') | ||||
| 738 | ) { | ||||
| 739 | my ($seqclass, $opts) = $self->_setup_class; | ||||
| 740 | $out = $seqclass->new( | ||||
| 741 | -seq => $s, | ||||
| 742 | -is_circular => $self->is_circular, | ||||
| 743 | -display_id => $self->display_id, | ||||
| 744 | -accession_number => $self->accession_number, | ||||
| 745 | -alphabet => 'rna', | ||||
| 746 | -desc => "${desc}[TRANSCRIBED]", | ||||
| 747 | -verbose => $self->verbose, | ||||
| 748 | %$opts, | ||||
| 749 | ); | ||||
| 750 | } else { | ||||
| 751 | $out = $self->clone; | ||||
| 752 | $out->seq($s); | ||||
| 753 | $out->alphabet('rna'); | ||||
| 754 | $out->desc($desc . "[TRANSCRIBED]"); | ||||
| 755 | } | ||||
| 756 | return $out; | ||||
| 757 | } | ||||
| 758 | |||||
| 759 | |||||
| 760 | =head2 rev_transcribe() | ||||
| 761 | |||||
| 762 | Title : rev_transcribe | ||||
| 763 | Usage : $rtseq = $seq->rev_transcribe | ||||
| 764 | Function: Convert base U to base T | ||||
| 765 | Returns : PrimarySeqI object of alphabet 'dna' or | ||||
| 766 | undef if $seq->alphabet ne 'rna' | ||||
| 767 | Args : | ||||
| 768 | |||||
| 769 | =cut | ||||
| 770 | |||||
| 771 | sub rev_transcribe { | ||||
| 772 | my $self = shift; | ||||
| 773 | return unless $self->alphabet eq 'rna'; | ||||
| 774 | my $s = $self->seq; | ||||
| 775 | $s =~ tr/uU/tT/; | ||||
| 776 | my $desc = $self->desc || ''; | ||||
| 777 | |||||
| 778 | # Create a new fresh object if $self is 'Bio::Seq::LargePrimarySeq' | ||||
| 779 | # or 'Bio::Seq::LargeSeq', if not take advantage of | ||||
| 780 | # Bio::Root::clone to get an object copy | ||||
| 781 | my $out; | ||||
| 782 | if ( $self->isa('Bio::Seq::LargePrimarySeq') | ||||
| 783 | or $self->isa('Bio::Seq::LargeSeq') | ||||
| 784 | ) { | ||||
| 785 | my ($seqclass, $opts) = $self->_setup_class; | ||||
| 786 | $out = $seqclass->new( | ||||
| 787 | -seq => $s, | ||||
| 788 | -is_circular => $self->is_circular, | ||||
| 789 | -display_id => $self->display_id, | ||||
| 790 | -accession_number => $self->accession_number, | ||||
| 791 | -alphabet => 'dna', | ||||
| 792 | -desc => $self->desc . "[REVERSE TRANSCRIBED]", | ||||
| 793 | -verbose => $self->verbose, | ||||
| 794 | %$opts, | ||||
| 795 | ); | ||||
| 796 | } else { | ||||
| 797 | $out = $self->clone; | ||||
| 798 | $out->seq($s); | ||||
| 799 | $out->alphabet('dna'); | ||||
| 800 | $out->desc($desc . "[REVERSE TRANSCRIBED]"); | ||||
| 801 | } | ||||
| 802 | return $out; | ||||
| 803 | } | ||||
| 804 | |||||
| 805 | |||||
| 806 | =head2 id | ||||
| 807 | |||||
| 808 | Title : id | ||||
| 809 | Usage : $id = $seq->id() | ||||
| 810 | Function: ID of the sequence. This should normally be (and actually is in | ||||
| 811 | the implementation provided here) just a synonym for display_id(). | ||||
| 812 | Returns : A string. | ||||
| 813 | Args : | ||||
| 814 | |||||
| 815 | =cut | ||||
| 816 | |||||
| 817 | sub id { | ||||
| 818 | my ($self)= @_; | ||||
| 819 | return $self->display_id(); | ||||
| 820 | } | ||||
| 821 | |||||
| 822 | |||||
| 823 | =head2 length | ||||
| 824 | |||||
| 825 | Title : length | ||||
| 826 | Usage : $len = $seq->length() | ||||
| 827 | Function: | ||||
| 828 | Returns : Integer representing the length of the sequence. | ||||
| 829 | Args : | ||||
| 830 | |||||
| 831 | =cut | ||||
| 832 | |||||
| 833 | sub length { | ||||
| 834 | my ($self)= @_; | ||||
| 835 | $self->throw_not_implemented(); | ||||
| 836 | } | ||||
| 837 | |||||
| 838 | |||||
| 839 | =head2 desc | ||||
| 840 | |||||
| 841 | Title : desc | ||||
| 842 | Usage : $seq->desc($newval); | ||||
| 843 | $description = $seq->desc(); | ||||
| 844 | Function: Get/set description text for a seq object | ||||
| 845 | Returns : Value of desc | ||||
| 846 | Args : newvalue (optional) | ||||
| 847 | |||||
| 848 | =cut | ||||
| 849 | |||||
| 850 | sub desc { | ||||
| 851 | shift->throw_not_implemented(); | ||||
| 852 | } | ||||
| 853 | |||||
| 854 | |||||
| 855 | =head2 is_circular | ||||
| 856 | |||||
| 857 | Title : is_circular | ||||
| 858 | Usage : if( $obj->is_circular) { # Do something } | ||||
| 859 | Function: Returns true if the molecule is circular | ||||
| 860 | Returns : Boolean value | ||||
| 861 | Args : none | ||||
| 862 | |||||
| 863 | =cut | ||||
| 864 | |||||
| 865 | sub is_circular { | ||||
| 866 | shift->throw_not_implemented; | ||||
| 867 | } | ||||
| 868 | |||||
| 869 | |||||
| 870 | =head1 Private functions | ||||
| 871 | |||||
| 872 | These are some private functions for the PrimarySeqI interface. You do not | ||||
| 873 | need to implement these functions | ||||
| 874 | |||||
| 875 | =head2 _find_orfs_nucleotide | ||||
| 876 | |||||
| 877 | Title : _find_orfs_nucleotide | ||||
| 878 | Usage : | ||||
| 879 | Function: Finds ORF starting at 1st initiation codon in nucleotide sequence. | ||||
| 880 | The ORF is not required to have a termination codon. | ||||
| 881 | Example : | ||||
| 882 | Returns : a list of string coordinates of ORF locations (0-based half-open), | ||||
| 883 | sorted descending by length (so that the longest is first) | ||||
| 884 | as: [ start, end, frame, length ], [ start, end, frame, length ], ... | ||||
| 885 | Args : Nucleotide sequence, | ||||
| 886 | CodonTable object, | ||||
| 887 | (optional) alternative initiation codon (e.g. 'ATA'), | ||||
| 888 | (optional) boolean that, if true, stops after finding the | ||||
| 889 | first available ORF | ||||
| 890 | |||||
| 891 | =cut | ||||
| 892 | |||||
| 893 | sub _find_orfs_nucleotide { | ||||
| 894 | my ( $self, $sequence, $codon_table, $start_codon, $first_only ) = @_; | ||||
| 895 | $sequence = uc $sequence; | ||||
| 896 | $start_codon = uc $start_codon if $start_codon; | ||||
| 897 | |||||
| 898 | my $is_start = $start_codon | ||||
| 899 | ? sub { shift eq $start_codon } | ||||
| 900 | : sub { $codon_table->is_start_codon( shift ) }; | ||||
| 901 | |||||
| 902 | # stores the begin index of the currently-running ORF in each | ||||
| 903 | # reading frame | ||||
| 904 | my @current_orf_start = (-1,-1,-1); | ||||
| 905 | |||||
| 906 | #< stores coordinates of longest observed orf (so far) in each | ||||
| 907 | # reading frame | ||||
| 908 | my @orfs; | ||||
| 909 | |||||
| 910 | # go through each base of the sequence, and each reading frame for each base | ||||
| 911 | my $seqlen = CORE::length $sequence; | ||||
| 912 | for( my $j = 0; $j <= $seqlen-3; $j++ ) { | ||||
| 913 | my $frame = $j % 3; | ||||
| 914 | |||||
| 915 | my $this_codon = substr( $sequence, $j, 3 ); | ||||
| 916 | |||||
| 917 | # if in an orf and this is either a stop codon or the last in-frame codon in the string | ||||
| 918 | if ( $current_orf_start[$frame] >= 0 ) { | ||||
| 919 | if ( $codon_table->is_ter_codon( $this_codon ) ||( my $is_last_codon_in_frame = ($j >= $seqlen-5)) ) { | ||||
| 920 | # record ORF start, end (half-open), length, and frame | ||||
| 921 | my @this_orf = ( $current_orf_start[$frame], $j+3, undef, $frame ); | ||||
| 922 | my $this_orf_length = $this_orf[2] = ( $this_orf[1] - $this_orf[0] ); | ||||
| 923 | |||||
| 924 | $self->warn( "Translating partial ORF " | ||||
| 925 | .$self->_truncate_seq( $self->_orf_sequence( $sequence, \@this_orf )) | ||||
| 926 | .' from end of nucleotide sequence' | ||||
| 927 | ) | ||||
| 928 | if $first_only && $is_last_codon_in_frame; | ||||
| 929 | |||||
| 930 | return \@this_orf if $first_only; | ||||
| 931 | push @orfs, \@this_orf; | ||||
| 932 | $current_orf_start[$frame] = -1; | ||||
| 933 | } | ||||
| 934 | } | ||||
| 935 | # if this is a start codon | ||||
| 936 | elsif ( $is_start->($this_codon) ) { | ||||
| 937 | $current_orf_start[$frame] = $j; | ||||
| 938 | } | ||||
| 939 | } | ||||
| 940 | |||||
| 941 | return sort { $b->[2] <=> $a->[2] } @orfs; | ||||
| 942 | } | ||||
| 943 | |||||
| 944 | |||||
| 945 | sub _truncate_seq { | ||||
| 946 | my ($self, $seq) = @_; | ||||
| 947 | return CORE::length($seq) > 200 ? substr($seq,0,50).'...'.substr($seq,-50) : $seq; | ||||
| 948 | } | ||||
| 949 | |||||
| 950 | |||||
| 951 | sub _orf_sequence { | ||||
| 952 | my ($self, $seq, $orf ) = @_; | ||||
| 953 | return '' unless $orf; | ||||
| 954 | return substr( $seq, $orf->[0], $orf->[2] ) | ||||
| 955 | } | ||||
| 956 | |||||
| 957 | |||||
| 958 | =head2 _attempt_to_load_Seq | ||||
| 959 | |||||
| 960 | Title : _attempt_to_load_Seq | ||||
| 961 | Usage : | ||||
| 962 | Function: | ||||
| 963 | Example : | ||||
| 964 | Returns : | ||||
| 965 | Args : | ||||
| 966 | |||||
| 967 | =cut | ||||
| 968 | |||||
| 969 | sub _attempt_to_load_Seq { | ||||
| 970 | my ($self) = @_; | ||||
| 971 | |||||
| 972 | if( $main::{'Bio::PrimarySeq'} ) { | ||||
| 973 | return 1; | ||||
| 974 | } else { | ||||
| 975 | eval { | ||||
| 976 | require Bio::PrimarySeq; | ||||
| 977 | }; | ||||
| 978 | if( $@ ) { | ||||
| 979 | my $text = "Bio::PrimarySeq could not be loaded for [$self]\n". | ||||
| 980 | "This indicates that you are using Bio::PrimarySeqI ". | ||||
| 981 | "without Bio::PrimarySeq loaded or without providing a ". | ||||
| 982 | "complete implementation.\nThe most likely problem is that there ". | ||||
| 983 | "has been a misconfiguration of the bioperl environment\n". | ||||
| 984 | "Actual exception:\n\n"; | ||||
| 985 | $self->throw("$text$@\n"); | ||||
| 986 | return 0; | ||||
| 987 | } | ||||
| 988 | return 1; | ||||
| 989 | } | ||||
| 990 | } | ||||
| 991 | |||||
| 992 | |||||
| 993 | sub _setup_class { | ||||
| 994 | # Return name of class and setup some default parameters | ||||
| 995 | my ($self) = @_; | ||||
| 996 | my $seqclass; | ||||
| 997 | if ($self->can_call_new()) { | ||||
| 998 | $seqclass = ref($self); | ||||
| 999 | } else { | ||||
| 1000 | $seqclass = 'Bio::PrimarySeq'; | ||||
| 1001 | $self->_attempt_to_load_Seq(); | ||||
| 1002 | } | ||||
| 1003 | my %opts; | ||||
| 1004 | if ($seqclass eq 'Bio::PrimarySeq') { | ||||
| 1005 | # Since sequence is in a Seq object, it has already been validated. | ||||
| 1006 | # We do not need to validate its trunc(), revcom(), etc | ||||
| 1007 | $opts{ -direct } = 1; | ||||
| 1008 | } | ||||
| 1009 | return $seqclass, \%opts; | ||||
| 1010 | } | ||||
| 1011 | |||||
| 1012 | |||||
| 1013 | 1 | 4µs | 1; |