| Filename | /Users/ap13/perl5/lib/perl5/Bio/PrimarySeq.pm |
| Statements | Executed 8 statements in 3.43ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 16µs | 32µs | Bio::PrimarySeq::BEGIN@128 |
| 1 | 1 | 1 | 12µs | 15.9ms | Bio::PrimarySeq::BEGIN@133 |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::_guess_alphabet |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::_guess_alphabet_from_string |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::_set_seq_by_ref |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::accession |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::accession_number |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::alphabet |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::authority |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::can_call_new |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::desc |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::description |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::display_id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::display_name |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::is_circular |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::length |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::namespace |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::new |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::object_id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::primary_id |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::seq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::subseq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::validate_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::PrimarySeq::version |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # bioperl module for Bio::PrimarySeq | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
| 7 | # | ||||
| 8 | # Copyright Ewan Birney | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | |||||
| 12 | # POD documentation - main docs before the code | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | Bio::PrimarySeq - Bioperl lightweight sequence object | ||||
| 17 | |||||
| 18 | =head1 SYNOPSIS | ||||
| 19 | |||||
| 20 | # Bio::SeqIO for file reading, Bio::DB::GenBank for | ||||
| 21 | # database reading | ||||
| 22 | |||||
| 23 | use Bio::Seq; | ||||
| 24 | use Bio::SeqIO; | ||||
| 25 | use Bio::DB::GenBank; | ||||
| 26 | |||||
| 27 | # make from memory | ||||
| 28 | |||||
| 29 | $seqobj = Bio::PrimarySeq->new ( | ||||
| 30 | -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG', | ||||
| 31 | -id => 'GeneFragment-12', | ||||
| 32 | -accession_number => 'X78121', | ||||
| 33 | -alphabet => 'dna', | ||||
| 34 | -is_circular => 1, | ||||
| 35 | ); | ||||
| 36 | print "Sequence ", $seqobj->id(), " with accession ", | ||||
| 37 | $seqobj->accession_number, "\n"; | ||||
| 38 | |||||
| 39 | # read from file | ||||
| 40 | |||||
| 41 | $inputstream = Bio::SeqIO->new( | ||||
| 42 | -file => "myseq.fa", | ||||
| 43 | -format => 'Fasta', | ||||
| 44 | ); | ||||
| 45 | $seqobj = $inputstream->next_seq(); | ||||
| 46 | print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n"; | ||||
| 47 | |||||
| 48 | # to get out parts of the sequence. | ||||
| 49 | |||||
| 50 | print "Sequence ", $seqobj->id(), " with accession ", | ||||
| 51 | $seqobj->accession_number, " and desc ", $seqobj->desc, "\n"; | ||||
| 52 | |||||
| 53 | $string = $seqobj->seq(); | ||||
| 54 | $string2 = $seqobj->subseq(1,40); | ||||
| 55 | |||||
| 56 | =head1 DESCRIPTION | ||||
| 57 | |||||
| 58 | PrimarySeq is a lightweight sequence object, storing the sequence, its | ||||
| 59 | name, a computer-useful unique name, and other fundamental attributes. | ||||
| 60 | It does not contain sequence features or other information. To have a | ||||
| 61 | sequence with sequence features you should use the Seq object which uses | ||||
| 62 | this object. | ||||
| 63 | |||||
| 64 | Although new users will use Bio::PrimarySeq a lot, in general you will | ||||
| 65 | be using it from the Bio::Seq object. For more information on Bio::Seq | ||||
| 66 | see L<Bio::Seq>. For interest you might like to know that | ||||
| 67 | Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls | ||||
| 68 | to do with sequence to it (the has-a relationship lets us get out of a | ||||
| 69 | otherwise nasty cyclical reference in Perl which would leak memory). | ||||
| 70 | |||||
| 71 | Sequence objects are defined by the Bio::PrimarySeqI interface, and this | ||||
| 72 | object is a pure Perl implementation of the interface. If that's | ||||
| 73 | gibberish to you, don't worry. The take home message is that this | ||||
| 74 | object is the bioperl default sequence object, but other people can | ||||
| 75 | use their own objects as sequences if they so wish. If you are | ||||
| 76 | interested in wrapping your own objects as compliant Bioperl sequence | ||||
| 77 | objects, then you should read the Bio::PrimarySeqI documentation | ||||
| 78 | |||||
| 79 | The documentation of this object is a merge of the Bio::PrimarySeq and | ||||
| 80 | Bio::PrimarySeqI documentation. This allows all the methods which you can | ||||
| 81 | call on sequence objects here. | ||||
| 82 | |||||
| 83 | =head1 FEEDBACK | ||||
| 84 | |||||
| 85 | =head2 Mailing Lists | ||||
| 86 | |||||
| 87 | User feedback is an integral part of the evolution of this and other | ||||
| 88 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
| 89 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
| 90 | |||||
| 91 | bioperl-l@bioperl.org - General discussion | ||||
| 92 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 93 | |||||
| 94 | =head2 Support | ||||
| 95 | |||||
| 96 | Please direct usage questions or support issues to the mailing list: | ||||
| 97 | |||||
| 98 | I<bioperl-l@bioperl.org> | ||||
| 99 | |||||
| 100 | rather than to the module maintainer directly. Many experienced and | ||||
| 101 | reponsive experts will be able look at the problem and quickly | ||||
| 102 | address it. Please include a thorough description of the problem | ||||
| 103 | with code and data examples if at all possible. | ||||
| 104 | |||||
| 105 | =head2 Reporting Bugs | ||||
| 106 | |||||
| 107 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 108 | the bugs and their resolution. Bug reports can be submitted via the | ||||
| 109 | web: | ||||
| 110 | |||||
| 111 | https://github.com/bioperl/bioperl-live/issues | ||||
| 112 | |||||
| 113 | =head1 AUTHOR - Ewan Birney | ||||
| 114 | |||||
| 115 | Email birney@ebi.ac.uk | ||||
| 116 | |||||
| 117 | =head1 APPENDIX | ||||
| 118 | |||||
| 119 | The rest of the documentation details each of the object | ||||
| 120 | methods. Internal methods are usually preceded with a _ | ||||
| 121 | |||||
| 122 | =cut | ||||
| 123 | |||||
| - - | |||||
| 126 | package Bio::PrimarySeq; | ||||
| 127 | |||||
| 128 | 2 | 55µs | 2 | 48µs | # spent 32µs (16+16) within Bio::PrimarySeq::BEGIN@128 which was called:
# once (16µs+16µs) by base::import at line 128 # spent 32µs making 1 call to Bio::PrimarySeq::BEGIN@128
# spent 16µs making 1 call to strict::import |
| 129 | |||||
| 130 | 1 | 2µs | our $MATCHPATTERN = 'A-Za-z\-\.\*\?=~'; | ||
| 131 | 1 | 300ns | our $GAP_SYMBOLS = '-~'; | ||
| 132 | |||||
| 133 | 1 | 400ns | # spent 15.9ms (12µs+15.9) within Bio::PrimarySeq::BEGIN@133 which was called:
# once (12µs+15.9ms) by base::import at line 134 | ||
| 134 | 1 | 3.36ms | 2 | 15.9ms | Bio::IdentifiableI Bio::DescribableI); # spent 15.9ms making 1 call to Bio::PrimarySeq::BEGIN@133
# spent 15.9ms making 1 call to base::import, recursion: max depth 1, sum of overlapping time 15.9ms |
| 135 | |||||
| 136 | |||||
| 137 | # Setup the allowed values for alphabet() | ||||
| 138 | 1 | 9µs | my %valid_type = map {$_, 1} qw( dna rna protein ); | ||
| 139 | |||||
| 140 | |||||
| 141 | =head2 new | ||||
| 142 | |||||
| 143 | Title : new | ||||
| 144 | Usage : $seqobj = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT', | ||||
| 145 | -id => 'human_id', | ||||
| 146 | -accession_number => 'AL000012', | ||||
| 147 | ); | ||||
| 148 | Function: Returns a new primary seq object from | ||||
| 149 | basic constructors, being a string for the sequence | ||||
| 150 | and strings for id and accession_number. | ||||
| 151 | |||||
| 152 | Note that you can provide an empty sequence string. However, in | ||||
| 153 | this case you MUST specify the type of sequence you wish to | ||||
| 154 | initialize by the parameter -alphabet. See alphabet() for possible | ||||
| 155 | values. | ||||
| 156 | Returns : a new Bio::PrimarySeq object | ||||
| 157 | Args : -seq => sequence string | ||||
| 158 | -ref_to_seq => ... or reference to a sequence string | ||||
| 159 | -display_id => display id of the sequence (locus name) | ||||
| 160 | -accession_number => accession number | ||||
| 161 | -primary_id => primary id (Genbank id) | ||||
| 162 | -version => version number | ||||
| 163 | -namespace => the namespace for the accession | ||||
| 164 | -authority => the authority for the namespace | ||||
| 165 | -description => description text | ||||
| 166 | -desc => alias for description | ||||
| 167 | -alphabet => skip alphabet guess and set it to dna, rna or protein | ||||
| 168 | -id => alias for display id | ||||
| 169 | -is_circular => boolean to indicate that sequence is circular | ||||
| 170 | -direct => boolean to directly set sequences. The next time -seq, | ||||
| 171 | seq() or -ref_to_seq is use, the sequence will not be | ||||
| 172 | validated. Be careful with this... | ||||
| 173 | -nowarnonempty => boolean to avoid warning when sequence is empty | ||||
| 174 | |||||
| 175 | =cut | ||||
| 176 | |||||
| 177 | sub new { | ||||
| 178 | my ($class, @args) = @_; | ||||
| 179 | my $self = $class->SUPER::new(@args); | ||||
| 180 | my ($seq, $id, $acc, $pid, $ns, $auth, $v, $oid, $desc, $description, | ||||
| 181 | $alphabet, $given_id, $is_circular, $direct, $ref_to_seq, $len, | ||||
| 182 | $nowarnonempty) = | ||||
| 183 | $self->_rearrange([qw(SEQ | ||||
| 184 | DISPLAY_ID | ||||
| 185 | ACCESSION_NUMBER | ||||
| 186 | PRIMARY_ID | ||||
| 187 | NAMESPACE | ||||
| 188 | AUTHORITY | ||||
| 189 | VERSION | ||||
| 190 | OBJECT_ID | ||||
| 191 | DESC | ||||
| 192 | DESCRIPTION | ||||
| 193 | ALPHABET | ||||
| 194 | ID | ||||
| 195 | IS_CIRCULAR | ||||
| 196 | DIRECT | ||||
| 197 | REF_TO_SEQ | ||||
| 198 | LENGTH | ||||
| 199 | NOWARNONEMPTY | ||||
| 200 | )], | ||||
| 201 | @args); | ||||
| 202 | |||||
| 203 | # Private var _nowarnonempty, needs to be set before calling _guess_alphabet | ||||
| 204 | $self->{'_nowarnonempty'} = $nowarnonempty; | ||||
| 205 | $self->{'_direct'} = $direct; | ||||
| 206 | |||||
| 207 | if( defined $id && defined $given_id ) { | ||||
| 208 | if( $id ne $given_id ) { | ||||
| 209 | $self->throw("Provided both id and display_id constructors: [$id] [$given_id]"); | ||||
| 210 | } | ||||
| 211 | } | ||||
| 212 | if( defined $given_id ) { $id = $given_id; } | ||||
| 213 | |||||
| 214 | # Bernd's idea: set ids now for more informative invalid sequence messages | ||||
| 215 | defined $id && $self->display_id($id); | ||||
| 216 | $acc && $self->accession_number($acc); | ||||
| 217 | defined $pid && $self->primary_id($pid); | ||||
| 218 | |||||
| 219 | # Set alphabet now to avoid guessing it later, when sequence is set | ||||
| 220 | $alphabet && $self->alphabet($alphabet); | ||||
| 221 | |||||
| 222 | # Set the length before the seq. If there is a seq, length will be updated later | ||||
| 223 | $self->{'length'} = $len || 0; | ||||
| 224 | |||||
| 225 | # Set the sequence (but also alphabet and length) | ||||
| 226 | if ($ref_to_seq) { | ||||
| 227 | $self->_set_seq_by_ref($ref_to_seq, $alphabet); | ||||
| 228 | } else { | ||||
| 229 | if (defined $seq) { | ||||
| 230 | # Note: the sequence string may be empty | ||||
| 231 | $self->seq($seq); | ||||
| 232 | } | ||||
| 233 | } | ||||
| 234 | |||||
| 235 | $desc && $self->desc($desc); | ||||
| 236 | $description && $self->description($description); | ||||
| 237 | $ns && $self->namespace($ns); | ||||
| 238 | $auth && $self->authority($auth); | ||||
| 239 | # Any variable that can have a value "0" must be tested with defined | ||||
| 240 | # or it will fail to be added to the new object | ||||
| 241 | defined($v) && $self->version($v); | ||||
| 242 | defined($oid) && $self->object_id($oid); | ||||
| 243 | defined($is_circular) && $self->is_circular($is_circular); | ||||
| 244 | |||||
| 245 | return $self; | ||||
| 246 | } | ||||
| 247 | |||||
| 248 | |||||
| 249 | =head2 seq | ||||
| 250 | |||||
| 251 | Title : seq | ||||
| 252 | Usage : $string = $seqobj->seq(); | ||||
| 253 | Function: Get or set the sequence as a string of letters. The case of | ||||
| 254 | the letters is left up to the implementer. Suggested cases are | ||||
| 255 | upper case for proteins and lower case for DNA sequence (IUPAC | ||||
| 256 | standard), but you should not rely on this. An error is thrown if | ||||
| 257 | the sequence contains invalid characters: see validate_seq(). | ||||
| 258 | Returns : A scalar | ||||
| 259 | Args : - Optional new sequence value (a string) to set | ||||
| 260 | - Optional alphabet (it is guessed by default) | ||||
| 261 | |||||
| 262 | =cut | ||||
| 263 | |||||
| 264 | sub seq { | ||||
| 265 | my ($self, @args) = @_; | ||||
| 266 | |||||
| 267 | if( scalar @args == 0 ) { | ||||
| 268 | return $self->{'seq'}; | ||||
| 269 | } | ||||
| 270 | |||||
| 271 | my ($seq_str, $alphabet) = @args; | ||||
| 272 | if (@args) { | ||||
| 273 | $self->_set_seq_by_ref(\$seq_str, $alphabet); | ||||
| 274 | } | ||||
| 275 | |||||
| 276 | return $self->{'seq'}; | ||||
| 277 | } | ||||
| 278 | |||||
| 279 | |||||
| 280 | sub _set_seq_by_ref { | ||||
| 281 | # Set a sequence by reference. A reference is used to avoid the cost of | ||||
| 282 | # copying the sequence (which can be very large) between functions. | ||||
| 283 | my ($self, $seq_str_ref, $alphabet) = @_; | ||||
| 284 | |||||
| 285 | # Validate sequence if sequence is not empty and we are not in direct mode | ||||
| 286 | if ( (! $self->{'_direct'}) && (defined $$seq_str_ref) ) { | ||||
| 287 | $self->validate_seq($$seq_str_ref, 1); | ||||
| 288 | } | ||||
| 289 | delete $self->{'_direct'}; # next sequence will have to be validated | ||||
| 290 | |||||
| 291 | # Record sequence length | ||||
| 292 | my $len = CORE::length($$seq_str_ref || ''); | ||||
| 293 | my $is_changed_seq = (exists $self->{'seq'}) && ($len > 0); | ||||
| 294 | # Note: if the new seq is empty or undef, this is not considered a change | ||||
| 295 | delete $self->{'_freeze_length'} if $is_changed_seq; | ||||
| 296 | $self->{'length'} = $len if not exists $self->{'_freeze_length'}; | ||||
| 297 | |||||
| 298 | # Set sequence | ||||
| 299 | $self->{'seq'} = $$seq_str_ref; | ||||
| 300 | |||||
| 301 | # Set or guess alphabet | ||||
| 302 | if ($alphabet) { | ||||
| 303 | # Alphabet specified, set it no matter what | ||||
| 304 | $self->alphabet($alphabet); | ||||
| 305 | } elsif ($is_changed_seq || (! defined($self->alphabet()))) { | ||||
| 306 | # If we changed a previous sequence to a new one or if there is no | ||||
| 307 | # alphabet yet at all, we need to guess the (possibly new) alphabet | ||||
| 308 | $self->_guess_alphabet(); | ||||
| 309 | } # else (seq not changed and alphabet was defined) do nothing | ||||
| 310 | |||||
| 311 | return 1; | ||||
| 312 | } | ||||
| 313 | |||||
| 314 | |||||
| 315 | =head2 validate_seq | ||||
| 316 | |||||
| 317 | Title : validate_seq | ||||
| 318 | Usage : if(! $seqobj->validate_seq($seq_str) ) { | ||||
| 319 | print "sequence $seq_str is not valid for an object of | ||||
| 320 | alphabet ",$seqobj->alphabet, "\n"; | ||||
| 321 | } | ||||
| 322 | Function: Test that the given sequence is valid, i.e. contains only valid | ||||
| 323 | characters. The allowed characters are all letters (A-Z) and '-','.', | ||||
| 324 | '*','?','=' and '~'. Spaces are not valid. Note that this | ||||
| 325 | implementation does not take alphabet() into account and that empty | ||||
| 326 | sequences are considered valid. | ||||
| 327 | Returns : 1 if the supplied sequence string is valid, 0 otherwise. | ||||
| 328 | Args : - Sequence string to be validated | ||||
| 329 | - Boolean to optionally throw an error if the sequence is invalid | ||||
| 330 | |||||
| 331 | =cut | ||||
| 332 | |||||
| 333 | sub validate_seq { | ||||
| 334 | my ($self, $seqstr, $throw) = @_; | ||||
| 335 | if ( (defined $seqstr ) && | ||||
| 336 | ($seqstr !~ /^[$MATCHPATTERN]*$/) ) { | ||||
| 337 | if ($throw) { | ||||
| 338 | $self->throw("Failed validation of sequence '".(defined($self->id) || | ||||
| 339 | '[unidentified sequence]')."'. Invalid characters were: " . | ||||
| 340 | join('',($seqstr =~ /[^$MATCHPATTERN]/g))); | ||||
| 341 | } | ||||
| 342 | return 0; | ||||
| 343 | } | ||||
| 344 | return 1; | ||||
| 345 | } | ||||
| 346 | |||||
| 347 | |||||
| 348 | =head2 subseq | ||||
| 349 | |||||
| 350 | Title : subseq | ||||
| 351 | Usage : $substring = $seqobj->subseq(10,40); | ||||
| 352 | $substring = $seqobj->subseq(10,40,'nogap'); | ||||
| 353 | $substring = $seqobj->subseq(-start=>10, -end=>40, -replace_with=>'tga'); | ||||
| 354 | $substring = $seqobj->subseq($location_obj); | ||||
| 355 | $substring = $seqobj->subseq($location_obj, -nogap => 1); | ||||
| 356 | Function: Return the subseq from start to end, where the first sequence | ||||
| 357 | character has coordinate 1 number is inclusive, ie 1-2 are the | ||||
| 358 | first two characters of the sequence. The given start coordinate | ||||
| 359 | has to be larger than the end, even if the sequence is circular. | ||||
| 360 | Returns : a string | ||||
| 361 | Args : integer for start position | ||||
| 362 | integer for end position | ||||
| 363 | OR | ||||
| 364 | Bio::LocationI location for subseq (strand honored) | ||||
| 365 | Specify -NOGAP=>1 to return subseq with gap characters removed | ||||
| 366 | Specify -REPLACE_WITH=>$new_subseq to replace the subseq returned | ||||
| 367 | with $new_subseq in the sequence object | ||||
| 368 | |||||
| 369 | =cut | ||||
| 370 | |||||
| 371 | sub subseq { | ||||
| 372 | my $self = shift; | ||||
| 373 | my @args = @_; | ||||
| 374 | my ($start, $end, $nogap, $replace) = $self->_rearrange([qw(START | ||||
| 375 | END | ||||
| 376 | NOGAP | ||||
| 377 | REPLACE_WITH)], @args); | ||||
| 378 | |||||
| 379 | # If -replace_with is specified, validate the replacement sequence | ||||
| 380 | if (defined $replace) { | ||||
| 381 | $self->validate_seq( $replace ) || | ||||
| 382 | $self->throw("Replacement sequence does not look valid"); | ||||
| 383 | } | ||||
| 384 | |||||
| 385 | if( ref($start) && $start->isa('Bio::LocationI') ) { | ||||
| 386 | my $loc = $start; | ||||
| 387 | my $seq = ''; | ||||
| 388 | |||||
| 389 | # For Split objects if Guide Strand is negative, | ||||
| 390 | # pass the sublocations in reverse | ||||
| 391 | my $order = 0; | ||||
| 392 | if ($loc->isa('Bio::Location::SplitLocationI')) { | ||||
| 393 | # guide_strand can return undef, so don't compare directly | ||||
| 394 | # to avoid 'uninitialized value' warning | ||||
| 395 | my $guide_strand = defined ($loc->guide_strand) ? ($loc->guide_strand) : 0; | ||||
| 396 | $order = ($guide_strand == -1) ? -1 : 0; | ||||
| 397 | } | ||||
| 398 | # Reversing order using ->each_Location(-1) does not work well for | ||||
| 399 | # cut by origin-splits (like "complement(join(1900..END,START..50))"), | ||||
| 400 | # so use "reverse" instead | ||||
| 401 | my @sublocs = ($order == -1) ? reverse $loc->each_Location(): $loc->each_Location; | ||||
| 402 | foreach my $subloc (@sublocs) { | ||||
| 403 | my $piece = $self->subseq(-start => $subloc->start(), | ||||
| 404 | -end => $subloc->end(), | ||||
| 405 | -replace_with => $replace, | ||||
| 406 | -nogap => $nogap); | ||||
| 407 | $piece =~ s/[$GAP_SYMBOLS]//g if $nogap; | ||||
| 408 | |||||
| 409 | # strand can return undef, so don't compare directly | ||||
| 410 | # to avoid 'uninitialized value' warning | ||||
| 411 | my $strand = defined ($subloc->strand) ? ($subloc->strand) : 0; | ||||
| 412 | if ($strand < 0) { | ||||
| 413 | $piece = $self->_revcom_from_string($piece, $self->alphabet); | ||||
| 414 | } | ||||
| 415 | $seq .= $piece; | ||||
| 416 | } | ||||
| 417 | return $seq; | ||||
| 418 | } elsif( defined $start && defined $end ) { | ||||
| 419 | if( $start > $end ){ | ||||
| 420 | $self->throw("Bad start,end parameters. Start [$start] has to be ". | ||||
| 421 | "less than end [$end]"); | ||||
| 422 | } | ||||
| 423 | if( $start <= 0 ) { | ||||
| 424 | $self->throw("Bad start parameter ($start). Start must be positive."); | ||||
| 425 | } | ||||
| 426 | |||||
| 427 | # Remove one from start, and then length is end-start | ||||
| 428 | $start--; | ||||
| 429 | |||||
| 430 | my $seqstr; | ||||
| 431 | if (defined $replace) { | ||||
| 432 | $seqstr = substr $self->{seq}, $start, $end-$start, $replace; | ||||
| 433 | } else { | ||||
| 434 | $seqstr = substr $self->{seq}, $start, $end-$start; | ||||
| 435 | } | ||||
| 436 | |||||
| 437 | |||||
| 438 | if ($end > $self->length) { | ||||
| 439 | if ($self->is_circular) { | ||||
| 440 | my $start = 0; | ||||
| 441 | my $end = $end - $self->length; | ||||
| 442 | |||||
| 443 | my $appendstr; | ||||
| 444 | if (defined $replace) { | ||||
| 445 | $appendstr = substr $self->{seq}, $start, $end-$start, $replace; | ||||
| 446 | } else { | ||||
| 447 | $appendstr = substr $self->{seq}, $start, $end-$start; | ||||
| 448 | } | ||||
| 449 | |||||
| 450 | $seqstr .= $appendstr; | ||||
| 451 | } else { | ||||
| 452 | $self->throw("Bad end parameter ($end). End must be less than ". | ||||
| 453 | "the total length of sequence (total=".$self->length.")") | ||||
| 454 | } | ||||
| 455 | } | ||||
| 456 | |||||
| 457 | $seqstr =~ s/[$GAP_SYMBOLS]//g if ($nogap); | ||||
| 458 | return $seqstr; | ||||
| 459 | |||||
| 460 | } else { | ||||
| 461 | $self->warn("Incorrect parameters to subseq - must be two integers or ". | ||||
| 462 | "a Bio::LocationI object. Got:", $self,$start,$end,$replace,$nogap); | ||||
| 463 | return; | ||||
| 464 | } | ||||
| 465 | } | ||||
| 466 | |||||
| 467 | |||||
| 468 | =head2 length | ||||
| 469 | |||||
| 470 | Title : length | ||||
| 471 | Usage : $len = $seqobj->length(); | ||||
| 472 | Function: Get the stored length of the sequence in number of symbols (bases | ||||
| 473 | or amino acids). In some circumstances, you can also set this attribute: | ||||
| 474 | |||||
| 475 | 1. For empty sequences, you can set the length to anything you want: | ||||
| 476 | my $seqobj = Bio::PrimarySeq->new( -length => 123 ); | ||||
| 477 | my $len = $seqobj->len; # 123 | ||||
| 478 | 2. To save memory when using very long sequences, you can set the | ||||
| 479 | length of the sequence to the length of the sequence (and nothing | ||||
| 480 | else): | ||||
| 481 | my $seqobj = Bio::PrimarySeq->new( -seq => 'ACGT...' ); # 1 Mbp sequence | ||||
| 482 | # process $seqobj... then after you're done with it | ||||
| 483 | $seqobj->length($seqobj->length); | ||||
| 484 | $seqobj->seq(undef); # free memory! | ||||
| 485 | my $len = $seqobj->len; # 1 Mbp | ||||
| 486 | |||||
| 487 | Note that if you set seq() to a value other than undef at any time, | ||||
| 488 | the length attribute will be reset. | ||||
| 489 | Returns : integer representing the length of the sequence. | ||||
| 490 | Args : Optionally, the value on set | ||||
| 491 | |||||
| 492 | =cut | ||||
| 493 | |||||
| 494 | sub length { | ||||
| 495 | my ($self, $val) = @_; | ||||
| 496 | if (defined $val) { | ||||
| 497 | my $len = $self->{'length'}; | ||||
| 498 | if ($len && ($len != $val)) { | ||||
| 499 | $self->throw("Can not set the length to $val, current length value is $len"); | ||||
| 500 | } | ||||
| 501 | $self->{'length'} = $val; | ||||
| 502 | $self->{'_freeze_length'} = undef; | ||||
| 503 | } | ||||
| 504 | return $self->{'length'}; | ||||
| 505 | } | ||||
| 506 | |||||
| 507 | |||||
| 508 | =head2 display_id | ||||
| 509 | |||||
| 510 | Title : display_id or display_name | ||||
| 511 | Usage : $id_string = $seqobj->display_id(); | ||||
| 512 | Function: Get or set the display id, aka the common name of the sequence object. | ||||
| 513 | |||||
| 514 | The semantics of this is that it is the most likely string to | ||||
| 515 | be used as an identifier of the sequence, and likely to have | ||||
| 516 | "human" readability. The id is equivalent to the ID field of | ||||
| 517 | the GenBank/EMBL databanks and the id field of the | ||||
| 518 | Swissprot/sptrembl database. In fasta format, the >(\S+) is | ||||
| 519 | presumed to be the id, though some people overload the id to | ||||
| 520 | embed other information. Bioperl does not use any embedded | ||||
| 521 | information in the ID field, and people are encouraged to use | ||||
| 522 | other mechanisms (accession field for example, or extending | ||||
| 523 | the sequence object) to solve this. | ||||
| 524 | |||||
| 525 | With the new Bio::DescribeableI interface, display_name aliases | ||||
| 526 | to this method. | ||||
| 527 | Returns : A string for the display ID | ||||
| 528 | Args : Optional string for the display ID to set | ||||
| 529 | |||||
| 530 | =cut | ||||
| 531 | |||||
| 532 | sub display_id { | ||||
| 533 | my ($self, $value) = @_; | ||||
| 534 | if( defined $value) { | ||||
| 535 | $self->{'display_id'} = $value; | ||||
| 536 | } | ||||
| 537 | return $self->{'display_id'}; | ||||
| 538 | } | ||||
| 539 | |||||
| 540 | |||||
| 541 | =head2 accession_number | ||||
| 542 | |||||
| 543 | Title : accession_number or object_id | ||||
| 544 | Usage : $unique_key = $seqobj->accession_number; | ||||
| 545 | Function: Returns the unique biological id for a sequence, commonly | ||||
| 546 | called the accession_number. For sequences from established | ||||
| 547 | databases, the implementors should try to use the correct | ||||
| 548 | accession number. Notice that primary_id() provides the | ||||
| 549 | unique id for the implemetation, allowing multiple objects | ||||
| 550 | to have the same accession number in a particular implementation. | ||||
| 551 | |||||
| 552 | For sequences with no accession number, this method should | ||||
| 553 | return "unknown". | ||||
| 554 | |||||
| 555 | [Note this method name is likely to change in 1.3] | ||||
| 556 | |||||
| 557 | With the new Bio::IdentifiableI interface, this is aliased | ||||
| 558 | to object_id | ||||
| 559 | Returns : A string | ||||
| 560 | Args : A string (optional) for setting | ||||
| 561 | |||||
| 562 | =cut | ||||
| 563 | |||||
| 564 | sub accession_number { | ||||
| 565 | my( $self, $acc ) = @_; | ||||
| 566 | if (defined $acc) { | ||||
| 567 | $self->{'accession_number'} = $acc; | ||||
| 568 | } else { | ||||
| 569 | $acc = $self->{'accession_number'}; | ||||
| 570 | $acc = 'unknown' unless defined $acc; | ||||
| 571 | } | ||||
| 572 | return $acc; | ||||
| 573 | } | ||||
| 574 | |||||
| 575 | |||||
| 576 | =head2 primary_id | ||||
| 577 | |||||
| 578 | Title : primary_id | ||||
| 579 | Usage : $unique_key = $seqobj->primary_id; | ||||
| 580 | Function: Returns the unique id for this object in this | ||||
| 581 | implementation. This allows implementations to manage their | ||||
| 582 | own object ids in a way the implementaiton can control | ||||
| 583 | clients can expect one id to map to one object. | ||||
| 584 | |||||
| 585 | For sequences with no natural primary id, this method | ||||
| 586 | should return a stringified memory location. | ||||
| 587 | Returns : A string | ||||
| 588 | Args : A string (optional, for setting) | ||||
| 589 | |||||
| 590 | =cut | ||||
| 591 | |||||
| 592 | sub primary_id { | ||||
| 593 | my $self = shift; | ||||
| 594 | |||||
| 595 | if(@_) { | ||||
| 596 | $self->{'primary_id'} = shift; | ||||
| 597 | } | ||||
| 598 | if( ! defined($self->{'primary_id'}) ) { | ||||
| 599 | return "$self"; | ||||
| 600 | } | ||||
| 601 | return $self->{'primary_id'}; | ||||
| 602 | } | ||||
| 603 | |||||
| 604 | |||||
| 605 | =head2 alphabet | ||||
| 606 | |||||
| 607 | Title : alphabet | ||||
| 608 | Usage : if( $seqobj->alphabet eq 'dna' ) { # Do something } | ||||
| 609 | Function: Get/set the alphabet of sequence, one of | ||||
| 610 | 'dna', 'rna' or 'protein'. This is case sensitive. | ||||
| 611 | |||||
| 612 | This is not called <type> because this would cause | ||||
| 613 | upgrade problems from the 0.5 and earlier Seq objects. | ||||
| 614 | Returns : a string either 'dna','rna','protein'. NB - the object must | ||||
| 615 | make a call of the type - if there is no alphabet specified it | ||||
| 616 | has to guess. | ||||
| 617 | Args : optional string to set : 'dna' | 'rna' | 'protein' | ||||
| 618 | |||||
| 619 | |||||
| 620 | =cut | ||||
| 621 | |||||
| 622 | sub alphabet { | ||||
| 623 | my ($self,$value) = @_; | ||||
| 624 | if (defined $value) { | ||||
| 625 | $value = lc $value; | ||||
| 626 | unless ( $valid_type{$value} ) { | ||||
| 627 | $self->throw("Alphabet '$value' is not a valid alphabet (". | ||||
| 628 | join(',', map "'$_'", sort keys %valid_type) .") lowercase"); | ||||
| 629 | } | ||||
| 630 | $self->{'alphabet'} = $value; | ||||
| 631 | } | ||||
| 632 | return $self->{'alphabet'}; | ||||
| 633 | } | ||||
| 634 | |||||
| 635 | |||||
| 636 | =head2 desc | ||||
| 637 | |||||
| 638 | Title : desc or description | ||||
| 639 | Usage : $seqobj->desc($newval); | ||||
| 640 | Function: Get/set description of the sequence. | ||||
| 641 | |||||
| 642 | 'description' is an alias for this for compliance with the | ||||
| 643 | Bio::DescribeableI interface. | ||||
| 644 | Returns : value of desc (a string) | ||||
| 645 | Args : newvalue (a string or undef, optional) | ||||
| 646 | |||||
| 647 | |||||
| 648 | =cut | ||||
| 649 | |||||
| 650 | sub desc{ | ||||
| 651 | my $self = shift; | ||||
| 652 | |||||
| 653 | return $self->{'desc'} = shift if @_; | ||||
| 654 | return $self->{'desc'}; | ||||
| 655 | } | ||||
| 656 | |||||
| 657 | |||||
| 658 | =head2 can_call_new | ||||
| 659 | |||||
| 660 | Title : can_call_new | ||||
| 661 | Usage : | ||||
| 662 | Function: | ||||
| 663 | Example : | ||||
| 664 | Returns : true | ||||
| 665 | Args : | ||||
| 666 | |||||
| 667 | =cut | ||||
| 668 | |||||
| 669 | sub can_call_new { | ||||
| 670 | my ($self) = @_; | ||||
| 671 | |||||
| 672 | return 1; | ||||
| 673 | } | ||||
| 674 | |||||
| 675 | |||||
| 676 | =head2 id | ||||
| 677 | |||||
| 678 | Title : id | ||||
| 679 | Usage : $id = $seqobj->id(); | ||||
| 680 | Function: This is mapped on display_id | ||||
| 681 | Example : | ||||
| 682 | Returns : | ||||
| 683 | Args : | ||||
| 684 | |||||
| 685 | =cut | ||||
| 686 | |||||
| 687 | sub id { | ||||
| 688 | return shift->display_id(@_); | ||||
| 689 | } | ||||
| 690 | |||||
| 691 | |||||
| 692 | =head2 is_circular | ||||
| 693 | |||||
| 694 | Title : is_circular | ||||
| 695 | Usage : if( $seqobj->is_circular) { # Do something } | ||||
| 696 | Function: Returns true if the molecule is circular | ||||
| 697 | Returns : Boolean value | ||||
| 698 | Args : none | ||||
| 699 | |||||
| 700 | =cut | ||||
| 701 | |||||
| 702 | sub is_circular{ | ||||
| 703 | my $self = shift; | ||||
| 704 | return $self->{'is_circular'} = shift if @_; | ||||
| 705 | return $self->{'is_circular'}; | ||||
| 706 | } | ||||
| 707 | |||||
| 708 | |||||
| 709 | =head1 Methods for Bio::IdentifiableI compliance | ||||
| 710 | |||||
| 711 | =head2 object_id | ||||
| 712 | |||||
| 713 | Title : object_id | ||||
| 714 | Usage : $string = $seqobj->object_id(); | ||||
| 715 | Function: Get or set a string which represents the stable primary identifier | ||||
| 716 | in this namespace of this object. For DNA sequences this | ||||
| 717 | is its accession_number, similarly for protein sequences. | ||||
| 718 | |||||
| 719 | This is aliased to accession_number(). | ||||
| 720 | Returns : A scalar | ||||
| 721 | Args : Optional object ID to set. | ||||
| 722 | |||||
| 723 | =cut | ||||
| 724 | |||||
| 725 | sub object_id { | ||||
| 726 | return shift->accession_number(@_); | ||||
| 727 | } | ||||
| 728 | |||||
| 729 | |||||
| 730 | =head2 version | ||||
| 731 | |||||
| 732 | Title : version | ||||
| 733 | Usage : $version = $seqobj->version(); | ||||
| 734 | Function: Get or set a number which differentiates between versions of | ||||
| 735 | the same object. Higher numbers are considered to be | ||||
| 736 | later and more relevant, but a single object described | ||||
| 737 | the same identifier should represent the same concept. | ||||
| 738 | Returns : A number | ||||
| 739 | Args : Optional version to set. | ||||
| 740 | |||||
| 741 | =cut | ||||
| 742 | |||||
| 743 | sub version{ | ||||
| 744 | my ($self,$value) = @_; | ||||
| 745 | if( defined $value) { | ||||
| 746 | $self->{'_version'} = $value; | ||||
| 747 | } | ||||
| 748 | return $self->{'_version'}; | ||||
| 749 | } | ||||
| 750 | |||||
| 751 | |||||
| 752 | =head2 authority | ||||
| 753 | |||||
| 754 | Title : authority | ||||
| 755 | Usage : $authority = $seqobj->authority(); | ||||
| 756 | Function: Get or set a string which represents the organisation which | ||||
| 757 | granted the namespace, written as the DNS name of the | ||||
| 758 | organisation (eg, wormbase.org). | ||||
| 759 | Returns : A scalar | ||||
| 760 | Args : Optional authority to set. | ||||
| 761 | |||||
| 762 | =cut | ||||
| 763 | |||||
| 764 | sub authority { | ||||
| 765 | my ($self, $value) = @_; | ||||
| 766 | if( defined $value) { | ||||
| 767 | $self->{'authority'} = $value; | ||||
| 768 | } | ||||
| 769 | return $self->{'authority'}; | ||||
| 770 | } | ||||
| 771 | |||||
| 772 | |||||
| 773 | =head2 namespace | ||||
| 774 | |||||
| 775 | Title : namespace | ||||
| 776 | Usage : $string = $seqobj->namespace(); | ||||
| 777 | Function: Get or set a string representing the name space this identifier | ||||
| 778 | is valid in, often the database name or the name describing the | ||||
| 779 | collection. | ||||
| 780 | Returns : A scalar | ||||
| 781 | Args : Optional namespace to set. | ||||
| 782 | |||||
| 783 | =cut | ||||
| 784 | |||||
| 785 | sub namespace{ | ||||
| 786 | my ($self,$value) = @_; | ||||
| 787 | if( defined $value) { | ||||
| 788 | $self->{'namespace'} = $value; | ||||
| 789 | } | ||||
| 790 | return $self->{'namespace'} || ""; | ||||
| 791 | } | ||||
| 792 | |||||
| 793 | |||||
| 794 | =head1 Methods for Bio::DescribableI compliance | ||||
| 795 | |||||
| 796 | This comprises of display_name and description. | ||||
| 797 | |||||
| 798 | =head2 display_name | ||||
| 799 | |||||
| 800 | Title : display_name | ||||
| 801 | Usage : $string = $seqobj->display_name(); | ||||
| 802 | Function: Get or set a string which is what should be displayed to the user. | ||||
| 803 | The string should have no spaces (ideally, though a cautious | ||||
| 804 | user of this interface would not assumme this) and should be | ||||
| 805 | less than thirty characters (though again, double checking | ||||
| 806 | this is a good idea). | ||||
| 807 | |||||
| 808 | This is aliased to display_id(). | ||||
| 809 | Returns : A string for the display name | ||||
| 810 | Args : Optional string for the display name to set. | ||||
| 811 | |||||
| 812 | =cut | ||||
| 813 | |||||
| 814 | sub display_name { | ||||
| 815 | return shift->display_id(@_); | ||||
| 816 | } | ||||
| 817 | |||||
| 818 | |||||
| 819 | =head2 description | ||||
| 820 | |||||
| 821 | Title : description | ||||
| 822 | Usage : $string = $seqobj->description(); | ||||
| 823 | Function: Get or set a text string suitable for displaying to the user a | ||||
| 824 | description. This string is likely to have spaces, but | ||||
| 825 | should not have any newlines or formatting - just plain | ||||
| 826 | text. The string should not be greater than 255 characters | ||||
| 827 | and clients can feel justified at truncating strings at 255 | ||||
| 828 | characters for the purposes of display. | ||||
| 829 | |||||
| 830 | This is aliased to desc(). | ||||
| 831 | Returns : A string for the description | ||||
| 832 | Args : Optional string for the description to set. | ||||
| 833 | |||||
| 834 | =cut | ||||
| 835 | |||||
| 836 | sub description { | ||||
| 837 | return shift->desc(@_); | ||||
| 838 | } | ||||
| 839 | |||||
| 840 | |||||
| 841 | =head1 Methods Inherited from Bio::PrimarySeqI | ||||
| 842 | |||||
| 843 | These methods are available on Bio::PrimarySeq, although they are | ||||
| 844 | actually implemented on Bio::PrimarySeqI | ||||
| 845 | |||||
| 846 | =head2 revcom | ||||
| 847 | |||||
| 848 | Title : revcom | ||||
| 849 | Usage : $rev = $seqobj->revcom(); | ||||
| 850 | Function: Produces a new Bio::SeqI implementing object which | ||||
| 851 | is the reversed complement of the sequence. For protein | ||||
| 852 | sequences this throws an exception of | ||||
| 853 | "Sequence is a protein. Cannot revcom". | ||||
| 854 | |||||
| 855 | The id is the same id as the orginal sequence, and the | ||||
| 856 | accession number is also indentical. If someone wants to | ||||
| 857 | track that this sequence has be reversed, it needs to | ||||
| 858 | define its own extensions. | ||||
| 859 | |||||
| 860 | To do an inplace edit of an object you can go: | ||||
| 861 | |||||
| 862 | $seqobj = $seqobj->revcom(); | ||||
| 863 | |||||
| 864 | This of course, causes Perl to handle the garbage | ||||
| 865 | collection of the old object, but it is roughly speaking as | ||||
| 866 | efficient as an inplace edit. | ||||
| 867 | Returns : A new (fresh) Bio::SeqI object | ||||
| 868 | Args : none | ||||
| 869 | |||||
| 870 | =head2 trunc | ||||
| 871 | |||||
| 872 | Title : trunc | ||||
| 873 | Usage : $subseq = $myseq->trunc(10,100); | ||||
| 874 | Function: Provides a truncation of a sequence, | ||||
| 875 | Returns : A fresh Bio::SeqI implementing object. | ||||
| 876 | Args : Numbers for the start and end positions | ||||
| 877 | |||||
| 878 | =head1 Internal methods | ||||
| 879 | |||||
| 880 | These are internal methods to PrimarySeq | ||||
| 881 | |||||
| 882 | =head2 _guess_alphabet | ||||
| 883 | |||||
| 884 | Title : _guess_alphabet | ||||
| 885 | Usage : | ||||
| 886 | Function: Automatically guess and set the type of sequence: dna, rna, protein | ||||
| 887 | or '' if the sequence was empty. This method first removes dots (.), | ||||
| 888 | dashes (-) and question marks (?) before guessing the alphabet | ||||
| 889 | using the IUPAC conventions for ambiguous residues. Since the DNA and | ||||
| 890 | RNA characters are also valid characters for proteins, there is | ||||
| 891 | no foolproof way of determining the right alphabet. This is our best | ||||
| 892 | guess only! | ||||
| 893 | Returns : string 'dna', 'rna', 'protein' or ''. | ||||
| 894 | Args : none | ||||
| 895 | |||||
| 896 | =cut | ||||
| 897 | |||||
| 898 | sub _guess_alphabet { | ||||
| 899 | my ($self) = @_; | ||||
| 900 | # Guess alphabet | ||||
| 901 | my $alphabet = $self->_guess_alphabet_from_string($self->seq, $self->{'_nowarnonempty'}); | ||||
| 902 | # Set alphabet unless it is unknown | ||||
| 903 | $self->alphabet($alphabet) if $alphabet; | ||||
| 904 | return $alphabet; | ||||
| 905 | } | ||||
| 906 | |||||
| 907 | |||||
| 908 | sub _guess_alphabet_from_string { | ||||
| 909 | # Get the alphabet from a sequence string | ||||
| 910 | my ($self, $str, $nowarnonempty) = @_; | ||||
| 911 | |||||
| 912 | $nowarnonempty = 0 if not defined $nowarnonempty; | ||||
| 913 | |||||
| 914 | # Remove chars that clearly don't denote nucleic or amino acids | ||||
| 915 | $str =~ s/[-.?]//gi; | ||||
| 916 | |||||
| 917 | # Check for sequences without valid letters | ||||
| 918 | my $alphabet; | ||||
| 919 | my $total = CORE::length($str); | ||||
| 920 | if( $total == 0 ) { | ||||
| 921 | if (not $nowarnonempty) { | ||||
| 922 | $self->warn("Got a sequence without letters. Could not guess alphabet"); | ||||
| 923 | } | ||||
| 924 | $alphabet = ''; | ||||
| 925 | } | ||||
| 926 | |||||
| 927 | # Determine alphabet now | ||||
| 928 | if (not defined $alphabet) { | ||||
| 929 | if ($str =~ m/[EFIJLOPQXZ]/i) { | ||||
| 930 | # Start with a safe method to find proteins. | ||||
| 931 | # Unambiguous IUPAC letters for proteins are: E,F,I,J,L,O,P,Q,X,Z | ||||
| 932 | $alphabet = 'protein'; | ||||
| 933 | } else { | ||||
| 934 | # Alphabet is unsure, could still be DNA, RNA or protein | ||||
| 935 | # DNA and RNA contain mostly A, T, U, G, C and N, but the other | ||||
| 936 | # letters they use are also among the 15 valid letters that a | ||||
| 937 | # protein sequence can contain at this stage. Make our best guess | ||||
| 938 | # based on sequence composition. If it contains over 70% of ACGTUN, | ||||
| 939 | # it is likely nucleic. | ||||
| 940 | if( ($str =~ tr/ATUGCNWSKMatugcnwskm//) / $total > 0.7 ) { | ||||
| 941 | if ( $str =~ m/U/i ) { | ||||
| 942 | $alphabet = 'rna'; | ||||
| 943 | } else { | ||||
| 944 | $alphabet = 'dna'; | ||||
| 945 | } | ||||
| 946 | } else { | ||||
| 947 | $alphabet = 'protein'; | ||||
| 948 | } | ||||
| 949 | } | ||||
| 950 | } | ||||
| 951 | |||||
| 952 | return $alphabet; | ||||
| 953 | } | ||||
| 954 | |||||
| 955 | |||||
| 956 | ############################################################################ | ||||
| 957 | # aliases due to name changes or to compensate for our lack of consistency # | ||||
| 958 | ############################################################################ | ||||
| 959 | |||||
| 960 | sub accession { | ||||
| 961 | my $self = shift; | ||||
| 962 | |||||
| 963 | $self->warn(ref($self)."::accession is deprecated, ". | ||||
| 964 | "use accession_number() instead"); | ||||
| 965 | return $self->accession_number(@_); | ||||
| 966 | } | ||||
| 967 | |||||
| 968 | 1 | 10µs | 1; |