| Filename | /Users/ap13/perl5/lib/perl5/Bio/Seq.pm |
| Statements | Executed 12 statements in 1.41ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 12µs | 24µs | Bio::Seq::BEGIN@459 |
| 1 | 1 | 1 | 10µs | 10µs | Bio::Seq::BEGIN@462 |
| 1 | 1 | 1 | 8µs | 1.65ms | Bio::Seq::BEGIN@464 |
| 1 | 1 | 1 | 5µs | 5µs | Bio::Seq::BEGIN@461 |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::DESTROY |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::accession |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::accession_number |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::add_Annotation |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::add_SeqFeature |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::all_SeqFeatures |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::alphabet |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::annotation |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::authority |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::can_call_new |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::desc |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::description |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::display_id |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::display_name |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::feature_count |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::get_Annotations |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::get_SeqFeatures |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::get_num_of_annotations |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::id |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::is_circular |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::length |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::namespace |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::new |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::num_Annotations |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::object_id |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::primary_id |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::primary_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::remove_Annotations |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::remove_SeqFeatures |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::seq |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::species |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::subseq |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::validate_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::Seq::version |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # BioPerl module for Bio::Seq | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Ewan Birney <birney@ebi.ac.uk> | ||||
| 7 | # | ||||
| 8 | # Copyright Ewan Birney | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | |||||
| 12 | # POD documentation - main docs before the code | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | Bio::Seq - Sequence object, with features | ||||
| 17 | |||||
| 18 | =head1 SYNOPSIS | ||||
| 19 | |||||
| 20 | # This is the main sequence object in Bioperl | ||||
| 21 | |||||
| 22 | # gets a sequence from a file | ||||
| 23 | $seqio = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat'); | ||||
| 24 | $seqobj = $seqio->next_seq(); | ||||
| 25 | |||||
| 26 | # SeqIO can both read and write sequences; see Bio::SeqIO | ||||
| 27 | # for more information and examples | ||||
| 28 | |||||
| 29 | # get from database | ||||
| 30 | $db = Bio::DB::GenBank->new(); | ||||
| 31 | $seqobj = $db->get_Seq_by_acc('X78121'); | ||||
| 32 | |||||
| 33 | # make from strings in script | ||||
| 34 | $seqobj = Bio::Seq->new( -display_id => 'my_id', | ||||
| 35 | -seq => $sequence_as_string); | ||||
| 36 | |||||
| 37 | # gets sequence as a string from sequence object | ||||
| 38 | $seqstr = $seqobj->seq(); # actual sequence as a string | ||||
| 39 | $seqstr = $seqobj->subseq(10,50); # slice in biological coordinates | ||||
| 40 | |||||
| 41 | # retrieves information from the sequence | ||||
| 42 | # features must implement Bio::SeqFeatureI interface | ||||
| 43 | |||||
| 44 | @features = $seqobj->get_SeqFeatures(); # just top level | ||||
| 45 | foreach my $feat ( @features ) { | ||||
| 46 | print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ", | ||||
| 47 | $feat->end," strand ",$feat->strand,"\n"; | ||||
| 48 | |||||
| 49 | # features retain link to underlying sequence object | ||||
| 50 | print "Feature sequence is ",$feat->seq->seq(),"\n" | ||||
| 51 | } | ||||
| 52 | |||||
| 53 | # sequences may have a species | ||||
| 54 | |||||
| 55 | if( defined $seq->species ) { | ||||
| 56 | print "Sequence is from ",$species->binomial," [",$species->common_name,"]\n"; | ||||
| 57 | } | ||||
| 58 | |||||
| 59 | # annotation objects are Bio::AnnotationCollectionI's | ||||
| 60 | $ann = $seqobj->annotation(); # annotation object | ||||
| 61 | |||||
| 62 | # references is one type of annotations to get. Also get | ||||
| 63 | # comment and dblink. Look at Bio::AnnotationCollection for | ||||
| 64 | # more information | ||||
| 65 | |||||
| 66 | foreach my $ref ( $ann->get_Annotations('reference') ) { | ||||
| 67 | print "Reference ",$ref->title,"\n"; | ||||
| 68 | } | ||||
| 69 | |||||
| 70 | # you can get truncations, translations and reverse complements, these | ||||
| 71 | # all give back Bio::Seq objects themselves, though currently with no | ||||
| 72 | # features transfered | ||||
| 73 | |||||
| 74 | my $trunc = $seqobj->trunc(100,200); | ||||
| 75 | my $rev = $seqobj->revcom(); | ||||
| 76 | |||||
| 77 | # there are many options to translate - check out the docs | ||||
| 78 | my $trans = $seqobj->translate(); | ||||
| 79 | |||||
| 80 | # these functions can be chained together | ||||
| 81 | |||||
| 82 | my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate(); | ||||
| 83 | |||||
| - - | |||||
| 86 | =head1 DESCRIPTION | ||||
| 87 | |||||
| 88 | A Seq object is a sequence with sequence features placed on it. The | ||||
| 89 | Seq object contains a PrimarySeq object for the actual sequence and | ||||
| 90 | also implements its interface. | ||||
| 91 | |||||
| 92 | In Bioperl we have 3 main players that people are going to use frequently | ||||
| 93 | |||||
| 94 | Bio::PrimarySeq - just the sequence and its names, nothing else. | ||||
| 95 | Bio::SeqFeatureI - a feature on a sequence, potentially with a sequence | ||||
| 96 | and a location and annotation. | ||||
| 97 | Bio::Seq - A sequence and a collection of sequence features | ||||
| 98 | (an aggregate) with its own annotation. | ||||
| 99 | |||||
| 100 | Although Bioperl is not tied heavily to file formats these distinctions do | ||||
| 101 | map to file formats sensibly and for some bioinformaticians this might help | ||||
| 102 | |||||
| 103 | Bio::PrimarySeq - Fasta file of a sequence | ||||
| 104 | Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table | ||||
| 105 | Bio::Seq - A single EMBL/GenBank/DDBJ entry | ||||
| 106 | |||||
| 107 | By having this split we avoid a lot of nasty circular references | ||||
| 108 | (sequence features can hold a reference to a sequence without the sequence | ||||
| 109 | holding a reference to the sequence feature). See L<Bio::PrimarySeq> and | ||||
| 110 | L<Bio::SeqFeatureI> for more information. | ||||
| 111 | |||||
| 112 | Ian Korf really helped in the design of the Seq and SeqFeature system. | ||||
| 113 | |||||
| 114 | =head2 Examples | ||||
| 115 | |||||
| 116 | A simple and fundamental block of code: | ||||
| 117 | |||||
| 118 | use Bio::SeqIO; | ||||
| 119 | |||||
| 120 | my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object | ||||
| 121 | my $seqobj = $seqIOobj->next_seq; # get a Seq object | ||||
| 122 | |||||
| 123 | With the Seq object in hand one has access to a powerful set of Bioperl | ||||
| 124 | methods and related Bioperl objects. This next script will take a file of sequences | ||||
| 125 | in EMBL format and create a file of the reverse-complemented sequences | ||||
| 126 | in Fasta format using Seq objects. It also prints out details about the | ||||
| 127 | exons it finds as sequence features in Genbank Flat File format. | ||||
| 128 | |||||
| 129 | use Bio::Seq; | ||||
| 130 | use Bio::SeqIO; | ||||
| 131 | |||||
| 132 | $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | ||||
| 133 | $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | ||||
| 134 | |||||
| 135 | while((my $seqobj = $seqin->next_seq())) { | ||||
| 136 | print "Seen sequence ",$seqobj->display_id,", start of seq ", | ||||
| 137 | substr($seqobj->seq,1,10),"\n"; | ||||
| 138 | if( $seqobj->alphabet eq 'dna') { | ||||
| 139 | $rev = $seqobj->revcom; | ||||
| 140 | $id = $seqobj->display_id(); | ||||
| 141 | $id = "$id.rev"; | ||||
| 142 | $rev->display_id($id); | ||||
| 143 | $seqout->write_seq($rev); | ||||
| 144 | } | ||||
| 145 | |||||
| 146 | foreach $feat ( $seqobj->get_SeqFeatures() ) { | ||||
| 147 | if( $feat->primary_tag eq 'exon' ) { | ||||
| 148 | print STDOUT "Location ",$feat->start,":", | ||||
| 149 | $feat->end," GFF[",$feat->gff_string,"]\n"; | ||||
| 150 | } | ||||
| 151 | } | ||||
| 152 | } | ||||
| 153 | |||||
| 154 | Let's examine the script. The lines below import the Bioperl modules. | ||||
| 155 | Seq is the main Bioperl sequence object and SeqIO is the Bioperl support | ||||
| 156 | for reading sequences from files and to files | ||||
| 157 | |||||
| 158 | use Bio::Seq; | ||||
| 159 | use Bio::SeqIO; | ||||
| 160 | |||||
| 161 | These two lines create two SeqIO streams: one for reading in sequences | ||||
| 162 | and one for outputting sequences: | ||||
| 163 | |||||
| 164 | $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | ||||
| 165 | $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | ||||
| 166 | |||||
| 167 | Notice that in the "$seqout" case there is a greater-than sign, | ||||
| 168 | indicating the file is being opened for writing. | ||||
| 169 | |||||
| 170 | Using the | ||||
| 171 | |||||
| 172 | '-argument' => value | ||||
| 173 | |||||
| 174 | syntax is common in Bioperl. The file argument is like an argument | ||||
| 175 | to open() . You can also pass in filehandles or FileHandle objects by | ||||
| 176 | using the -fh argument (see L<Bio::SeqIO> documentation for details). | ||||
| 177 | Many formats in Bioperl are handled, including Fasta, EMBL, GenBank, | ||||
| 178 | Swissprot (swiss), PIR, and GCG. | ||||
| 179 | |||||
| 180 | $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); | ||||
| 181 | $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); | ||||
| 182 | |||||
| 183 | This is the main loop which will loop progressively through sequences | ||||
| 184 | in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq | ||||
| 185 | object from the file: | ||||
| 186 | |||||
| 187 | while((my $seqobj = $seqio->next_seq())) { | ||||
| 188 | |||||
| 189 | This print line below accesses fields in the Seq object directly. The | ||||
| 190 | $seqobj-E<gt>display_id is the way to access the display_id attribute | ||||
| 191 | of the Seq object. The $seqobj-E<gt>seq method gets the actual | ||||
| 192 | sequence out as string. Then you can do manipulation of this if | ||||
| 193 | you want to (there are however easy ways of doing truncation, | ||||
| 194 | reverse-complement and translation). | ||||
| 195 | |||||
| 196 | print "Seen sequence ",$seqobj->display_id,", start of seq ", | ||||
| 197 | substr($seqobj->seq,1,10),"\n"; | ||||
| 198 | |||||
| 199 | Bioperl has to guess the alphabet of the sequence, being either 'dna', | ||||
| 200 | 'rna', or 'protein'. The alphabet attribute is one of these three | ||||
| 201 | possibilities. | ||||
| 202 | |||||
| 203 | if( $seqobj->alphabet eq 'dna') { | ||||
| 204 | |||||
| 205 | The $seqobj-E<gt>revcom method provides the reverse complement of the Seq | ||||
| 206 | object as another Seq object. Thus, the $rev variable is a reference to | ||||
| 207 | another Seq object. For example, one could repeat the above print line | ||||
| 208 | for this Seq object (putting $rev in place of $seqobj). In this | ||||
| 209 | case we are going to output the object into the file stream we built | ||||
| 210 | earlier on. | ||||
| 211 | |||||
| 212 | $rev = $seqobj->revcom; | ||||
| 213 | |||||
| 214 | When we output it, we want the id of the outputted object | ||||
| 215 | to be changed to "$id.rev", ie, with .rev on the end of the name. The | ||||
| 216 | following lines retrieve the id of the sequence object, add .rev | ||||
| 217 | to this and then set the display_id of the rev sequence object to | ||||
| 218 | this. Notice that to set the display_id attribute you just need | ||||
| 219 | call the same method, display_id(), with the new value as an argument. | ||||
| 220 | Getting and setting values with the same method is common in Bioperl. | ||||
| 221 | |||||
| 222 | $id = $seqobj->display_id(); | ||||
| 223 | $id = "$id.rev"; | ||||
| 224 | $rev->display_id($id); | ||||
| 225 | |||||
| 226 | The write_seq method on the SeqIO output object, $seqout, writes the | ||||
| 227 | $rev object to the filestream we built at the top of the script. | ||||
| 228 | The filestream knows that it is outputting in fasta format, and | ||||
| 229 | so it provides fasta output. | ||||
| 230 | |||||
| 231 | $seqout->write_seq($rev); | ||||
| 232 | |||||
| 233 | This block of code loops over sequence features in the sequence | ||||
| 234 | object, trying to find ones who have been tagged as 'exon'. | ||||
| 235 | Features have start and end attributes and can be outputted | ||||
| 236 | in Genbank Flat File format, GFF, a standarized format for sequence | ||||
| 237 | features. | ||||
| 238 | |||||
| 239 | foreach $feat ( $seqobj->get_SeqFeatures() ) { | ||||
| 240 | if( $feat->primary_tag eq 'exon' ) { | ||||
| 241 | print STDOUT "Location ",$feat->start,":", | ||||
| 242 | $feat->end," GFF[",$feat->gff_string,"]\n"; | ||||
| 243 | } | ||||
| 244 | } | ||||
| 245 | |||||
| 246 | The code above shows how a few Bio::Seq methods suffice to read, parse, | ||||
| 247 | reformat and analyze sequences from a file. A full list of methods | ||||
| 248 | available to Bio::Seq objects is shown below. Bear in mind that some of | ||||
| 249 | these methods come from PrimarySeq objects, which are simpler | ||||
| 250 | than Seq objects, stripped of features (see L<Bio::PrimarySeq> for | ||||
| 251 | more information). | ||||
| 252 | |||||
| 253 | # these methods return strings, and accept strings in some cases: | ||||
| 254 | |||||
| 255 | $seqobj->seq(); # string of sequence | ||||
| 256 | $seqobj->subseq(5,10); # part of the sequence as a string | ||||
| 257 | $seqobj->accession_number(); # when there, the accession number | ||||
| 258 | $seqobj->alphabet(); # one of 'dna','rna',or 'protein' | ||||
| 259 | $seqobj->version() # when there, the version | ||||
| 260 | $seqobj->keywords(); # when there, the Keywords line | ||||
| 261 | $seqobj->length() # length | ||||
| 262 | $seqobj->desc(); # description | ||||
| 263 | $seqobj->primary_id(); # a unique id for this sequence regardless | ||||
| 264 | # of its display_id or accession number | ||||
| 265 | $seqobj->display_id(); # the human readable id of the sequence | ||||
| 266 | |||||
| 267 | Some of these values map to fields in common formats. For example, The | ||||
| 268 | display_id() method returns the LOCUS name of a Genbank entry, | ||||
| 269 | the (\S+) following the E<gt> character in a Fasta file, the ID from | ||||
| 270 | a SwissProt file, and so on. The desc() method will return the DEFINITION | ||||
| 271 | line of a Genbank file, the description following the display_id in a | ||||
| 272 | Fasta file, and the DE field in a SwissProt file. | ||||
| 273 | |||||
| 274 | # the following methods return new Seq objects, but | ||||
| 275 | # do not transfer features across to the new object: | ||||
| 276 | |||||
| 277 | $seqobj->trunc(5,10) # truncation from 5 to 10 as new object | ||||
| 278 | $seqobj->revcom # reverse complements sequence | ||||
| 279 | $seqobj->translate # translation of the sequence | ||||
| 280 | |||||
| 281 | # if new() can be called this method returns 1, else 0 | ||||
| 282 | |||||
| 283 | $seqobj->can_call_new | ||||
| 284 | |||||
| 285 | # the following method determines if the given string will be accepted | ||||
| 286 | # by the seq() method - if the string is acceptable then validate() | ||||
| 287 | # returns 1, or 0 if not | ||||
| 288 | |||||
| 289 | $seqobj->validate_seq($string) | ||||
| 290 | |||||
| 291 | # the following method returns or accepts a Species object: | ||||
| 292 | |||||
| 293 | $seqobj->species(); | ||||
| 294 | |||||
| 295 | Please see L<Bio::Species> for more information on this object. | ||||
| 296 | |||||
| 297 | # the following method returns or accepts an Annotation object | ||||
| 298 | # which in turn allows access to Annotation::Reference | ||||
| 299 | # and Annotation::Comment objects: | ||||
| 300 | |||||
| 301 | $seqobj->annotation(); | ||||
| 302 | |||||
| 303 | These annotations typically refer to entire sequences, unlike | ||||
| 304 | features. See L<Bio::AnnotationCollectionI>, | ||||
| 305 | L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and | ||||
| 306 | L<Bio::Annotation::Comment> for details. | ||||
| 307 | |||||
| 308 | It is also important to be able to describe defined portions of a | ||||
| 309 | sequence. The combination of some description and the corresponding | ||||
| 310 | sub-sequence is called a feature - an exon and its coordinates within | ||||
| 311 | a gene is an example of a feature, or a domain within a protein. | ||||
| 312 | |||||
| 313 | # the following methods return an array of SeqFeatureI objects: | ||||
| 314 | |||||
| 315 | $seqobj->get_SeqFeatures # The 'top level' sequence features | ||||
| 316 | $seqobj->get_all_SeqFeatures # All sequence features, including sub-seq | ||||
| 317 | # features, such as features in an exon | ||||
| 318 | |||||
| 319 | # to find out the number of features use: | ||||
| 320 | |||||
| 321 | $seqobj->feature_count | ||||
| 322 | |||||
| 323 | Here are just some of the methods available to SeqFeatureI objects: | ||||
| 324 | |||||
| 325 | # these methods return numbers: | ||||
| 326 | |||||
| 327 | $feat->start # start position (1 is the first base) | ||||
| 328 | $feat->end # end position (2 is the second base) | ||||
| 329 | $feat->strand # 1 means forward, -1 reverse, 0 not relevant | ||||
| 330 | |||||
| 331 | # these methods return or accept strings: | ||||
| 332 | |||||
| 333 | $feat->primary_tag # the name of the sequence feature, eg | ||||
| 334 | # 'exon', 'glycoslyation site', 'TM domain' | ||||
| 335 | $feat->source_tag # where the feature comes from, eg, 'EMBL_GenBank', | ||||
| 336 | # or 'BLAST' | ||||
| 337 | |||||
| 338 | # this method returns the more austere PrimarySeq object, not a | ||||
| 339 | # Seq object - the main difference is that PrimarySeq objects do not | ||||
| 340 | # themselves contain sequence features | ||||
| 341 | |||||
| 342 | $feat->seq # the sequence between start,end on the | ||||
| 343 | # correct strand of the sequence | ||||
| 344 | |||||
| 345 | See L<Bio::PrimarySeq> for more details on PrimarySeq objects. | ||||
| 346 | |||||
| 347 | # useful methods for feature comparisons, for start/end points | ||||
| 348 | |||||
| 349 | $feat->overlaps($other) # do $feat and $other overlap? | ||||
| 350 | $feat->contains($other) # is $other completely within $feat? | ||||
| 351 | $feat->equals($other) # do $feat and $other completely agree? | ||||
| 352 | |||||
| 353 | # one can also add features | ||||
| 354 | |||||
| 355 | $seqobj->add_SeqFeature($feat) # returns 1 if successful | ||||
| 356 | |||||
| 357 | # sub features. For complex join() statements, the feature | ||||
| 358 | # is one sequence feature with many sub SeqFeatures | ||||
| 359 | |||||
| 360 | $feat->sub_SeqFeature # returns array of sub seq features | ||||
| 361 | |||||
| 362 | Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>, | ||||
| 363 | for more information on sequence features. | ||||
| 364 | |||||
| 365 | It is worth mentioning that one can also retrieve the start and end | ||||
| 366 | positions of a feature using a Bio::LocationI object: | ||||
| 367 | |||||
| 368 | $location = $feat->location # $location is a Bio::LocationI object | ||||
| 369 | $location->start; # start position | ||||
| 370 | $location->end; # end position | ||||
| 371 | |||||
| 372 | This is useful because one needs a Bio::Location::SplitLocationI object | ||||
| 373 | in order to retrieve the coordinates inside the Genbank or EMBL join() | ||||
| 374 | statements (e.g. "CDS join(51..142,273..495,1346..1474)"): | ||||
| 375 | |||||
| 376 | if ( $feat->location->isa('Bio::Location::SplitLocationI') && | ||||
| 377 | $feat->primary_tag eq 'CDS' ) { | ||||
| 378 | foreach $loc ( $feat->location->sub_Location ) { | ||||
| 379 | print $loc->start . ".." . $loc->end . "\n"; | ||||
| 380 | } | ||||
| 381 | } | ||||
| 382 | |||||
| 383 | See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more | ||||
| 384 | information. | ||||
| 385 | |||||
| 386 | =head1 Implemented Interfaces | ||||
| 387 | |||||
| 388 | This class implements the following interfaces. | ||||
| 389 | |||||
| 390 | =over 4 | ||||
| 391 | |||||
| 392 | =item Bio::SeqI | ||||
| 393 | |||||
| 394 | Note that this includes implementing Bio::PrimarySeqI. | ||||
| 395 | |||||
| 396 | =item Bio::IdentifiableI | ||||
| 397 | |||||
| 398 | =item Bio::DescribableI | ||||
| 399 | |||||
| 400 | =item Bio::AnnotatableI | ||||
| 401 | |||||
| 402 | =item Bio::FeatureHolderI | ||||
| 403 | |||||
| 404 | =back | ||||
| 405 | |||||
| 406 | =head1 FEEDBACK | ||||
| 407 | |||||
| 408 | |||||
| 409 | =head2 Mailing Lists | ||||
| 410 | |||||
| 411 | User feedback is an integral part of the evolution of this and other | ||||
| 412 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
| 413 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
| 414 | |||||
| 415 | bioperl-l@bioperl.org - General discussion | ||||
| 416 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 417 | |||||
| 418 | =head2 Support | ||||
| 419 | |||||
| 420 | Please direct usage questions or support issues to the mailing list: | ||||
| 421 | |||||
| 422 | I<bioperl-l@bioperl.org> | ||||
| 423 | |||||
| 424 | rather than to the module maintainer directly. Many experienced and | ||||
| 425 | reponsive experts will be able look at the problem and quickly | ||||
| 426 | address it. Please include a thorough description of the problem | ||||
| 427 | with code and data examples if at all possible. | ||||
| 428 | |||||
| 429 | =head2 Reporting Bugs | ||||
| 430 | |||||
| 431 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 432 | the bugs and their resolution. Bug reports can be submitted via the | ||||
| 433 | web: | ||||
| 434 | |||||
| 435 | https://github.com/bioperl/bioperl-live/issues | ||||
| 436 | |||||
| 437 | =head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects | ||||
| 438 | |||||
| 439 | Email birney@ebi.ac.uk | ||||
| 440 | |||||
| 441 | =head1 CONTRIBUTORS | ||||
| 442 | |||||
| 443 | Jason Stajich E<lt>jason@bioperl.orgE<gt> | ||||
| 444 | Mark A. Jensen maj -at- fortinbras -dot- us | ||||
| 445 | |||||
| 446 | =head1 APPENDIX | ||||
| 447 | |||||
| 448 | |||||
| 449 | The rest of the documentation details each of the object | ||||
| 450 | methods. Internal methods are usually preceded with a "_". | ||||
| 451 | |||||
| 452 | =cut | ||||
| 453 | |||||
| 454 | #' | ||||
| 455 | # Let the code begin... | ||||
| 456 | |||||
| 457 | |||||
| 458 | package Bio::Seq; | ||||
| 459 | 2 | 22µs | 2 | 36µs | # spent 24µs (12+12) within Bio::Seq::BEGIN@459 which was called:
# once (12µs+12µs) by Bio::DB::InMemoryCache::BEGIN@75 at line 459 # spent 24µs making 1 call to Bio::Seq::BEGIN@459
# spent 12µs making 1 call to strict::import |
| 460 | |||||
| 461 | 2 | 20µs | 1 | 5µs | # spent 5µs within Bio::Seq::BEGIN@461 which was called:
# once (5µs+0s) by Bio::DB::InMemoryCache::BEGIN@75 at line 461 # spent 5µs making 1 call to Bio::Seq::BEGIN@461 |
| 462 | 2 | 31µs | 1 | 10µs | # spent 10µs within Bio::Seq::BEGIN@462 which was called:
# once (10µs+0s) by Bio::DB::InMemoryCache::BEGIN@75 at line 462 # spent 10µs making 1 call to Bio::Seq::BEGIN@462 |
| 463 | |||||
| 464 | 2 | 1.32ms | 2 | 1.65ms | # spent 1.65ms (8µs+1.64) within Bio::Seq::BEGIN@464 which was called:
# once (8µs+1.64ms) by Bio::DB::InMemoryCache::BEGIN@75 at line 464 # spent 1.65ms making 1 call to Bio::Seq::BEGIN@464
# spent 1.64ms making 1 call to base::import, recursion: max depth 1, sum of overlapping time 1.64ms |
| 465 | |||||
| 466 | =head2 new | ||||
| 467 | |||||
| 468 | Title : new | ||||
| 469 | Usage : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT', | ||||
| 470 | -id => 'human_id', | ||||
| 471 | -accession_number => 'AL000012', | ||||
| 472 | ); | ||||
| 473 | |||||
| 474 | Function: Returns a new Seq object from | ||||
| 475 | basic constructors, being a string for the sequence | ||||
| 476 | and strings for id and accession_number | ||||
| 477 | Returns : a new Bio::Seq object | ||||
| 478 | |||||
| 479 | =cut | ||||
| 480 | |||||
| 481 | sub new { | ||||
| 482 | my($caller,@args) = @_; | ||||
| 483 | |||||
| 484 | if( $caller ne 'Bio::Seq') { | ||||
| 485 | $caller = ref($caller) if ref($caller); | ||||
| 486 | } | ||||
| 487 | |||||
| 488 | # we know our inherietance hierarchy | ||||
| 489 | my $self = Bio::Root::Root->new(@args); | ||||
| 490 | bless $self,$caller; | ||||
| 491 | |||||
| 492 | # this is way too sneaky probably. We delegate the construction of | ||||
| 493 | # the Seq object onto PrimarySeq and then pop primary_seq into | ||||
| 494 | # our primary_seq slot | ||||
| 495 | |||||
| 496 | my $pseq = Bio::PrimarySeq->new(@args); | ||||
| 497 | |||||
| 498 | # as we have just made this, we know it is ok to set hash directly | ||||
| 499 | # rather than going through the method | ||||
| 500 | |||||
| 501 | $self->{'primary_seq'} = $pseq; | ||||
| 502 | |||||
| 503 | # setting this array is now delayed until the final | ||||
| 504 | # moment, again speed ups for non feature containing things | ||||
| 505 | # $self->{'_as_feat'} = []; | ||||
| 506 | |||||
| 507 | |||||
| 508 | my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args); | ||||
| 509 | |||||
| 510 | # for a number of cases - reading fasta files - these are never set. This | ||||
| 511 | # gives a quick optimisation around testing things later on | ||||
| 512 | |||||
| 513 | if( defined $ann || defined $pid || defined $feat || defined $species ) { | ||||
| 514 | $pid && $self->primary_id($pid); | ||||
| 515 | $species && $self->species($species); | ||||
| 516 | $ann && $self->annotation($ann); | ||||
| 517 | |||||
| 518 | if( defined $feat ) { | ||||
| 519 | if( ref($feat) !~ /ARRAY/i ) { | ||||
| 520 | if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) { | ||||
| 521 | $self->add_SeqFeature($feat); | ||||
| 522 | } else { | ||||
| 523 | $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self)); | ||||
| 524 | } | ||||
| 525 | } else { | ||||
| 526 | foreach my $feature ( @$feat ) { | ||||
| 527 | $self->add_SeqFeature($feature); | ||||
| 528 | } | ||||
| 529 | } | ||||
| 530 | } | ||||
| 531 | } | ||||
| 532 | |||||
| 533 | return $self; | ||||
| 534 | } | ||||
| 535 | |||||
| 536 | |||||
| 537 | =head1 PrimarySeq interface | ||||
| 538 | |||||
| 539 | |||||
| 540 | The PrimarySeq interface provides the basic sequence getting | ||||
| 541 | and setting methods for on all sequences. | ||||
| 542 | |||||
| 543 | These methods implement the Bio::PrimarySeq interface by delegating | ||||
| 544 | to the primary_seq inside the object. This means that you | ||||
| 545 | can use a Seq object wherever there is a PrimarySeq, and | ||||
| 546 | of course, you are free to use these functions anyway. | ||||
| 547 | |||||
| 548 | =cut | ||||
| 549 | |||||
| 550 | =head2 seq | ||||
| 551 | |||||
| 552 | Title : seq | ||||
| 553 | Usage : $string = $obj->seq() | ||||
| 554 | Function: Get/Set the sequence as a string of letters. The | ||||
| 555 | case of the letters is left up to the implementer. | ||||
| 556 | Suggested cases are upper case for proteins and lower case for | ||||
| 557 | DNA sequence (IUPAC standard), | ||||
| 558 | but implementations are suggested to keep an open mind about | ||||
| 559 | case (some users... want mixed case!) | ||||
| 560 | Returns : A scalar | ||||
| 561 | Args : Optionally on set the new value (a string). An optional second | ||||
| 562 | argument presets the alphabet (otherwise it will be guessed). | ||||
| 563 | Both parameters may also be given in named parameter style | ||||
| 564 | with -seq and -alphabet being the names. | ||||
| 565 | |||||
| 566 | =cut | ||||
| 567 | |||||
| 568 | sub seq { | ||||
| 569 | return shift->primary_seq()->seq(@_); | ||||
| 570 | } | ||||
| 571 | |||||
| 572 | |||||
| 573 | =head2 validate_seq | ||||
| 574 | |||||
| 575 | Title : validate_seq | ||||
| 576 | Usage : if(! $seqobj->validate_seq($seq_str) ) { | ||||
| 577 | print "sequence $seq_str is not valid for an object of | ||||
| 578 | alphabet ",$seqobj->alphabet, "\n"; | ||||
| 579 | } | ||||
| 580 | Function: Test that the given sequence is valid, i.e. contains only valid | ||||
| 581 | characters. The allowed characters are all letters (A-Z) and '-','.', | ||||
| 582 | '*','?','=' and '~'. Spaces are not valid. Note that this | ||||
| 583 | implementation does not take alphabet() into account. | ||||
| 584 | Returns : 1 if the supplied sequence string is valid, 0 otherwise. | ||||
| 585 | Args : - Sequence string to be validated | ||||
| 586 | - Boolean to throw an error if the sequence is invalid | ||||
| 587 | |||||
| 588 | =cut | ||||
| 589 | |||||
| 590 | sub validate_seq { | ||||
| 591 | return shift->primary_seq()->validate_seq(@_); | ||||
| 592 | } | ||||
| 593 | |||||
| 594 | |||||
| 595 | =head2 length | ||||
| 596 | |||||
| 597 | Title : length | ||||
| 598 | Usage : $len = $seq->length() | ||||
| 599 | Function: | ||||
| 600 | Example : | ||||
| 601 | Returns : Integer representing the length of the sequence. | ||||
| 602 | Args : None | ||||
| 603 | |||||
| 604 | =cut | ||||
| 605 | |||||
| 606 | sub length { | ||||
| 607 | return shift->primary_seq()->length(@_); | ||||
| 608 | } | ||||
| 609 | |||||
| 610 | |||||
| 611 | =head1 Methods from the Bio::PrimarySeqI interface | ||||
| 612 | |||||
| 613 | =head2 subseq | ||||
| 614 | |||||
| 615 | Title : subseq | ||||
| 616 | Usage : $substring = $obj->subseq(10,40); | ||||
| 617 | Function: Returns the subseq from start to end, where the first base | ||||
| 618 | is 1 and the number is inclusive, ie 1-2 are the first two | ||||
| 619 | bases of the sequence | ||||
| 620 | |||||
| 621 | Start cannot be larger than end but can be equal | ||||
| 622 | |||||
| 623 | Returns : A string | ||||
| 624 | Args : 2 integers | ||||
| 625 | |||||
| 626 | |||||
| 627 | =cut | ||||
| 628 | |||||
| 629 | sub subseq { | ||||
| 630 | return shift->primary_seq()->subseq(@_); | ||||
| 631 | } | ||||
| 632 | |||||
| 633 | |||||
| 634 | =head2 display_id | ||||
| 635 | |||||
| 636 | Title : display_id | ||||
| 637 | Usage : $id = $obj->display_id or $obj->display_id($newid); | ||||
| 638 | Function: Gets or sets the display id, also known as the common name of | ||||
| 639 | the Seq object. | ||||
| 640 | |||||
| 641 | The semantics of this is that it is the most likely string | ||||
| 642 | to be used as an identifier of the sequence, and likely to | ||||
| 643 | have "human" readability. The id is equivalent to the LOCUS | ||||
| 644 | field of the GenBank/EMBL databanks and the ID field of the | ||||
| 645 | Swissprot/sptrembl database. In fasta format, the >(\S+) is | ||||
| 646 | presumed to be the id, though some people overload the id | ||||
| 647 | to embed other information. Bioperl does not use any | ||||
| 648 | embedded information in the ID field, and people are | ||||
| 649 | encouraged to use other mechanisms (accession field for | ||||
| 650 | example, or extending the sequence object) to solve this. | ||||
| 651 | |||||
| 652 | Notice that $seq->id() maps to this function, mainly for | ||||
| 653 | legacy/convenience issues. | ||||
| 654 | Returns : A string | ||||
| 655 | Args : None or a new id | ||||
| 656 | |||||
| 657 | =cut | ||||
| 658 | |||||
| 659 | sub display_id { | ||||
| 660 | return shift->primary_seq->display_id(@_); | ||||
| 661 | } | ||||
| 662 | |||||
| 663 | |||||
| 664 | =head2 accession_number | ||||
| 665 | |||||
| 666 | Title : accession_number | ||||
| 667 | Usage : $unique_biological_key = $obj->accession_number; | ||||
| 668 | Function: Returns the unique biological id for a sequence, commonly | ||||
| 669 | called the accession_number. For sequences from established | ||||
| 670 | databases, the implementors should try to use the correct | ||||
| 671 | accession number. Notice that primary_id() provides the | ||||
| 672 | unique id for the implemetation, allowing multiple objects | ||||
| 673 | to have the same accession number in a particular implementation. | ||||
| 674 | |||||
| 675 | For sequences with no accession number, this method should return | ||||
| 676 | "unknown". | ||||
| 677 | |||||
| 678 | Can also be used to set the accession number. | ||||
| 679 | Example : $key = $seq->accession_number or $seq->accession_number($key) | ||||
| 680 | Returns : A string | ||||
| 681 | Args : None or an accession number | ||||
| 682 | |||||
| 683 | =cut | ||||
| 684 | |||||
| 685 | sub accession_number { | ||||
| 686 | return shift->primary_seq->accession_number(@_); | ||||
| 687 | } | ||||
| 688 | |||||
| 689 | |||||
| 690 | =head2 desc | ||||
| 691 | |||||
| 692 | Title : desc | ||||
| 693 | Usage : $seqobj->desc($string) or $seqobj->desc() | ||||
| 694 | Function: Sets or gets the description of the sequence | ||||
| 695 | Example : | ||||
| 696 | Returns : The description | ||||
| 697 | Args : The description or none | ||||
| 698 | |||||
| 699 | =cut | ||||
| 700 | |||||
| 701 | sub desc { | ||||
| 702 | return shift->primary_seq->desc(@_); | ||||
| 703 | } | ||||
| 704 | |||||
| 705 | |||||
| 706 | =head2 primary_id | ||||
| 707 | |||||
| 708 | Title : primary_id | ||||
| 709 | Usage : $unique_implementation_key = $obj->primary_id; | ||||
| 710 | Function: Returns the unique id for this object in this | ||||
| 711 | implementation. This allows implementations to manage | ||||
| 712 | their own object ids in a way the implementation can control | ||||
| 713 | clients can expect one id to map to one object. | ||||
| 714 | |||||
| 715 | For sequences with no natural id, this method should return | ||||
| 716 | a stringified memory location. | ||||
| 717 | |||||
| 718 | Can also be used to set the primary_id (or unset to undef). | ||||
| 719 | |||||
| 720 | [Note this method name is likely to change in 1.3] | ||||
| 721 | |||||
| 722 | Example : $id = $seq->primary_id or $seq->primary_id($id) | ||||
| 723 | Returns : A string | ||||
| 724 | Args : None or an id, or undef to unset the primary id. | ||||
| 725 | |||||
| 726 | =cut | ||||
| 727 | |||||
| 728 | sub primary_id { | ||||
| 729 | # Note: this used to not delegate to the primary seq. This is | ||||
| 730 | # really bad in very subtle ways. E.g., if you created the object | ||||
| 731 | # with a primary id given to the constructor and then later you | ||||
| 732 | # change the primary id, if this method wouldn't delegate you'd | ||||
| 733 | # have different values for primary id in the PrimarySeq object | ||||
| 734 | # compared to this instance. Not good. | ||||
| 735 | |||||
| 736 | # I can't remember why not delegating was ever deemed | ||||
| 737 | # advantageous, but I hereby claim that its problems far outweigh | ||||
| 738 | # its advantages, if there are any. Convince me otherwise if you | ||||
| 739 | # disagree. HL 2004/08/05 | ||||
| 740 | |||||
| 741 | return shift->primary_seq->primary_id(@_); | ||||
| 742 | } | ||||
| 743 | |||||
| 744 | |||||
| 745 | =head2 can_call_new | ||||
| 746 | |||||
| 747 | Title : can_call_new | ||||
| 748 | Usage : if ( $obj->can_call_new ) { | ||||
| 749 | $newobj = $obj->new( %param ); | ||||
| 750 | } | ||||
| 751 | Function: can_call_new returns 1 or 0 depending | ||||
| 752 | on whether an implementation allows new | ||||
| 753 | constructor to be called. If a new constructor | ||||
| 754 | is allowed, then it should take the followed hashed | ||||
| 755 | constructor list. | ||||
| 756 | |||||
| 757 | $myobject->new( -seq => $sequence_as_string, | ||||
| 758 | -display_id => $id | ||||
| 759 | -accession_number => $accession | ||||
| 760 | -alphabet => 'dna', | ||||
| 761 | ); | ||||
| 762 | Example : | ||||
| 763 | Returns : 1 or 0 | ||||
| 764 | Args : None | ||||
| 765 | |||||
| 766 | =cut | ||||
| 767 | |||||
| 768 | sub can_call_new { | ||||
| 769 | return 1; | ||||
| 770 | } | ||||
| 771 | |||||
| 772 | |||||
| 773 | =head2 alphabet | ||||
| 774 | |||||
| 775 | Title : alphabet | ||||
| 776 | Usage : if ( $obj->alphabet eq 'dna' ) { /Do Something/ } | ||||
| 777 | Function: Get/Set the type of sequence being one of | ||||
| 778 | 'dna', 'rna' or 'protein'. This is case sensitive. | ||||
| 779 | |||||
| 780 | This is not called <type> because this would cause | ||||
| 781 | upgrade problems from the 0.5 and earlier Seq objects. | ||||
| 782 | |||||
| 783 | Returns : A string either 'dna','rna','protein'. NB - the object must | ||||
| 784 | make a call of the type - if there is no type specified it | ||||
| 785 | has to guess. | ||||
| 786 | Args : optional string to set : 'dna' | 'rna' | 'protein' | ||||
| 787 | |||||
| 788 | =cut | ||||
| 789 | |||||
| 790 | sub alphabet { | ||||
| 791 | my $self = shift; | ||||
| 792 | return $self->primary_seq->alphabet(@_) if @_ && defined $_[0]; | ||||
| 793 | return $self->primary_seq->alphabet(); | ||||
| 794 | } | ||||
| 795 | |||||
| 796 | |||||
| 797 | =head2 is_circular | ||||
| 798 | |||||
| 799 | Title : is_circular | ||||
| 800 | Usage : if( $obj->is_circular) { /Do Something/ } | ||||
| 801 | Function: Returns true if the molecule is circular | ||||
| 802 | Returns : Boolean value | ||||
| 803 | Args : none | ||||
| 804 | |||||
| 805 | =cut | ||||
| 806 | |||||
| 807 | sub is_circular { | ||||
| 808 | return shift->primary_seq()->is_circular(@_); | ||||
| 809 | } | ||||
| 810 | |||||
| 811 | |||||
| 812 | =head1 Methods for Bio::IdentifiableI compliance | ||||
| 813 | |||||
| 814 | =head2 object_id | ||||
| 815 | |||||
| 816 | Title : object_id | ||||
| 817 | Usage : $string = $obj->object_id() | ||||
| 818 | Function: a string which represents the stable primary identifier | ||||
| 819 | in this namespace of this object. For DNA sequences this | ||||
| 820 | is its accession_number, similarly for protein sequences | ||||
| 821 | |||||
| 822 | This is aliased to accession_number(). | ||||
| 823 | Returns : A scalar | ||||
| 824 | |||||
| 825 | =cut | ||||
| 826 | |||||
| 827 | sub object_id { | ||||
| 828 | return shift->accession_number(@_); | ||||
| 829 | } | ||||
| 830 | |||||
| 831 | |||||
| 832 | =head2 version | ||||
| 833 | |||||
| 834 | Title : version | ||||
| 835 | Usage : $version = $obj->version() | ||||
| 836 | Function: a number which differentiates between versions of | ||||
| 837 | the same object. Higher numbers are considered to be | ||||
| 838 | later and more relevant, but a single object described | ||||
| 839 | the same identifier should represent the same concept | ||||
| 840 | |||||
| 841 | Returns : A number | ||||
| 842 | |||||
| 843 | =cut | ||||
| 844 | |||||
| 845 | sub version{ | ||||
| 846 | return shift->primary_seq->version(@_); | ||||
| 847 | } | ||||
| 848 | |||||
| 849 | |||||
| 850 | =head2 authority | ||||
| 851 | |||||
| 852 | Title : authority | ||||
| 853 | Usage : $authority = $obj->authority() | ||||
| 854 | Function: a string which represents the organisation which | ||||
| 855 | granted the namespace, written as the DNS name for | ||||
| 856 | organisation (eg, wormbase.org) | ||||
| 857 | |||||
| 858 | Returns : A scalar | ||||
| 859 | |||||
| 860 | =cut | ||||
| 861 | |||||
| 862 | sub authority { | ||||
| 863 | return shift->primary_seq()->authority(@_); | ||||
| 864 | } | ||||
| 865 | |||||
| 866 | |||||
| 867 | =head2 namespace | ||||
| 868 | |||||
| 869 | Title : namespace | ||||
| 870 | Usage : $string = $obj->namespace() | ||||
| 871 | Function: A string representing the name space this identifier | ||||
| 872 | is valid in, often the database name or the name | ||||
| 873 | describing the collection | ||||
| 874 | |||||
| 875 | Returns : A scalar | ||||
| 876 | |||||
| 877 | =cut | ||||
| 878 | |||||
| 879 | sub namespace{ | ||||
| 880 | return shift->primary_seq()->namespace(@_); | ||||
| 881 | } | ||||
| 882 | |||||
| 883 | |||||
| 884 | =head1 Methods for Bio::DescribableI compliance | ||||
| 885 | |||||
| 886 | =head2 display_name | ||||
| 887 | |||||
| 888 | Title : display_name | ||||
| 889 | Usage : $string = $obj->display_name() | ||||
| 890 | Function: A string which is what should be displayed to the user | ||||
| 891 | the string should have no spaces (ideally, though a cautious | ||||
| 892 | user of this interface would not assumme this) and should be | ||||
| 893 | less than thirty characters (though again, double checking | ||||
| 894 | this is a good idea) | ||||
| 895 | |||||
| 896 | This is aliased to display_id(). | ||||
| 897 | Returns : A scalar | ||||
| 898 | |||||
| 899 | =cut | ||||
| 900 | |||||
| 901 | sub display_name { | ||||
| 902 | return shift->display_id(@_); | ||||
| 903 | } | ||||
| 904 | |||||
| 905 | =head2 description | ||||
| 906 | |||||
| 907 | Title : description | ||||
| 908 | Usage : $string = $obj->description() | ||||
| 909 | Function: A text string suitable for displaying to the user a | ||||
| 910 | description. This string is likely to have spaces, but | ||||
| 911 | should not have any newlines or formatting - just plain | ||||
| 912 | text. The string should not be greater than 255 characters | ||||
| 913 | and clients can feel justified at truncating strings at 255 | ||||
| 914 | characters for the purposes of display | ||||
| 915 | |||||
| 916 | This is aliased to desc(). | ||||
| 917 | Returns : A scalar | ||||
| 918 | |||||
| 919 | =cut | ||||
| 920 | |||||
| 921 | sub description { | ||||
| 922 | return shift->desc(@_); | ||||
| 923 | } | ||||
| 924 | |||||
| 925 | |||||
| 926 | =head1 Methods for implementing Bio::AnnotatableI | ||||
| 927 | |||||
| 928 | =head2 annotation | ||||
| 929 | |||||
| 930 | Title : annotation | ||||
| 931 | Usage : $ann = $seq->annotation or | ||||
| 932 | $seq->annotation($ann) | ||||
| 933 | Function: Gets or sets the annotation | ||||
| 934 | Returns : Bio::AnnotationCollectionI object | ||||
| 935 | Args : None or Bio::AnnotationCollectionI object | ||||
| 936 | |||||
| 937 | See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection> | ||||
| 938 | for more information | ||||
| 939 | |||||
| 940 | =cut | ||||
| 941 | |||||
| 942 | sub annotation { | ||||
| 943 | my ($obj,$value) = @_; | ||||
| 944 | if( defined $value ) { | ||||
| 945 | $obj->throw("object of class ".ref($value)." does not implement ". | ||||
| 946 | "Bio::AnnotationCollectionI. Too bad.") | ||||
| 947 | unless $value->isa("Bio::AnnotationCollectionI"); | ||||
| 948 | $obj->{'_annotation'} = $value; | ||||
| 949 | } elsif( ! defined $obj->{'_annotation'}) { | ||||
| 950 | $obj->{'_annotation'} = Bio::Annotation::Collection->new(); | ||||
| 951 | } | ||||
| 952 | return $obj->{'_annotation'}; | ||||
| 953 | } | ||||
| 954 | |||||
| 955 | |||||
| 956 | =head1 Methods for delegating Bio::AnnotationCollectionI | ||||
| 957 | |||||
| 958 | =head2 get_Annotations() | ||||
| 959 | |||||
| 960 | Usage : my @annotations = $seq->get_Annotations('key') | ||||
| 961 | Function: Retrieves all the Bio::AnnotationI objects for a specific key | ||||
| 962 | for this object | ||||
| 963 | Returns : list of Bio::AnnotationI - empty if no objects stored for a key | ||||
| 964 | Args : string which is key for annotations | ||||
| 965 | |||||
| 966 | =cut | ||||
| 967 | |||||
| 968 | sub get_Annotations { shift->annotation->get_Annotations(@_); } | ||||
| 969 | |||||
| 970 | |||||
| 971 | =head2 add_Annotation() | ||||
| 972 | |||||
| 973 | Usage : $seq->add_Annotation('reference',$object); | ||||
| 974 | $seq->add_Annotation($object,'Bio::MyInterface::DiseaseI'); | ||||
| 975 | $seq->add_Annotation($object); | ||||
| 976 | $seq->add_Annotation('disease',$object,'Bio::MyInterface::DiseaseI'); | ||||
| 977 | Function: Adds an annotation for a specific key for this sequence object. | ||||
| 978 | |||||
| 979 | If the key is omitted, the object to be added must provide a value | ||||
| 980 | via its tagname(). | ||||
| 981 | |||||
| 982 | If the archetype is provided, this and future objects added under | ||||
| 983 | that tag have to comply with the archetype and will be rejected | ||||
| 984 | otherwise. | ||||
| 985 | |||||
| 986 | Returns : none | ||||
| 987 | Args : annotation key ('disease', 'dblink', ...) | ||||
| 988 | object to store (must be Bio::AnnotationI compliant) | ||||
| 989 | [optional] object archetype to map future storage of object | ||||
| 990 | of these types to | ||||
| 991 | |||||
| 992 | =cut | ||||
| 993 | |||||
| 994 | sub add_Annotation { shift->annotation->add_Annotation(@_) } | ||||
| 995 | |||||
| 996 | |||||
| 997 | =head2 remove_Annotations() | ||||
| 998 | |||||
| 999 | Usage : $seq->remove_Annotations() | ||||
| 1000 | Function: Remove the annotations for the specified key from this sequence | ||||
| 1001 | object | ||||
| 1002 | Returns : an list of Bio::AnnotationI compliant objects which were stored | ||||
| 1003 | under the given key(s) for this sequence object | ||||
| 1004 | Args : the key(s) (tag name(s), one or more strings) for which to | ||||
| 1005 | remove annotations (optional; if none given, flushes all | ||||
| 1006 | annotations) | ||||
| 1007 | |||||
| 1008 | =cut | ||||
| 1009 | |||||
| 1010 | sub remove_Annotations { shift->annotation->remove_Annotations(@_) } | ||||
| 1011 | |||||
| 1012 | |||||
| 1013 | =head2 get_num_of_annotations() | ||||
| 1014 | |||||
| 1015 | Usage : my $count = $seq->get_num_of_annotations() | ||||
| 1016 | Alias : num_Annotations | ||||
| 1017 | Function: Returns the count of all annotations stored for this sequence | ||||
| 1018 | object | ||||
| 1019 | Returns : integer | ||||
| 1020 | Args : none | ||||
| 1021 | |||||
| 1022 | =cut | ||||
| 1023 | |||||
| 1024 | sub get_num_of_annotations { shift->annotation->get_num_of_annotations(@_) } | ||||
| 1025 | sub num_Annotations { shift->get_num_of_annotations }; #DWYM | ||||
| 1026 | |||||
| 1027 | |||||
| 1028 | =head1 Methods to implement Bio::FeatureHolderI | ||||
| 1029 | |||||
| 1030 | This includes methods for retrieving, adding, and removing features. | ||||
| 1031 | |||||
| 1032 | =cut | ||||
| 1033 | |||||
| 1034 | =head2 get_SeqFeatures | ||||
| 1035 | |||||
| 1036 | Title : get_SeqFeatures | ||||
| 1037 | Usage : | ||||
| 1038 | Function: Get the feature objects held by this feature holder. | ||||
| 1039 | |||||
| 1040 | Features which are not top-level are subfeatures of one or | ||||
| 1041 | more of the returned feature objects, which means that you | ||||
| 1042 | must traverse the subfeature arrays of each top-level | ||||
| 1043 | feature object in order to traverse all features associated | ||||
| 1044 | with this sequence. | ||||
| 1045 | |||||
| 1046 | Top-level features can be obtained by tag, specified in | ||||
| 1047 | the argument. | ||||
| 1048 | |||||
| 1049 | Use get_all_SeqFeatures() if you want the feature tree | ||||
| 1050 | flattened into one single array. | ||||
| 1051 | |||||
| 1052 | Example : | ||||
| 1053 | Returns : an array of Bio::SeqFeatureI implementing objects | ||||
| 1054 | Args : [optional] scalar string (feature tag) | ||||
| 1055 | |||||
| 1056 | =cut | ||||
| 1057 | |||||
| 1058 | sub get_SeqFeatures{ | ||||
| 1059 | my $self = shift; | ||||
| 1060 | my $tag = shift; | ||||
| 1061 | |||||
| 1062 | if( !defined $self->{'_as_feat'} ) { | ||||
| 1063 | $self->{'_as_feat'} = []; | ||||
| 1064 | } | ||||
| 1065 | if ($tag) { | ||||
| 1066 | return map { $_->primary_tag eq $tag ? $_ : () } @{$self->{'_as_feat'}}; | ||||
| 1067 | } | ||||
| 1068 | else { | ||||
| 1069 | return @{$self->{'_as_feat'}}; | ||||
| 1070 | } | ||||
| 1071 | } | ||||
| 1072 | |||||
| 1073 | |||||
| 1074 | =head2 get_all_SeqFeatures | ||||
| 1075 | |||||
| 1076 | Title : get_all_SeqFeatures | ||||
| 1077 | Usage : @feat_ary = $seq->get_all_SeqFeatures(); | ||||
| 1078 | Function: Returns the tree of feature objects attached to this | ||||
| 1079 | sequence object flattened into one single array. Top-level | ||||
| 1080 | features will still contain their subfeature-arrays, which | ||||
| 1081 | means that you will encounter subfeatures twice if you | ||||
| 1082 | traverse the subfeature tree of the returned objects. | ||||
| 1083 | |||||
| 1084 | Use get_SeqFeatures() if you want the array to contain only | ||||
| 1085 | the top-level features. | ||||
| 1086 | |||||
| 1087 | Returns : An array of Bio::SeqFeatureI implementing objects. | ||||
| 1088 | Args : None | ||||
| 1089 | |||||
| 1090 | =cut | ||||
| 1091 | |||||
| 1092 | # this implementation is inherited from FeatureHolderI | ||||
| 1093 | |||||
| 1094 | =head2 feature_count | ||||
| 1095 | |||||
| 1096 | Title : feature_count | ||||
| 1097 | Usage : $seq->feature_count() | ||||
| 1098 | Function: Return the number of SeqFeatures attached to a sequence | ||||
| 1099 | Returns : integer representing the number of SeqFeatures | ||||
| 1100 | Args : None | ||||
| 1101 | |||||
| 1102 | =cut | ||||
| 1103 | |||||
| 1104 | sub feature_count { | ||||
| 1105 | my ($self) = @_; | ||||
| 1106 | |||||
| 1107 | if (defined($self->{'_as_feat'})) { | ||||
| 1108 | return ($#{$self->{'_as_feat'}} + 1); | ||||
| 1109 | } else { | ||||
| 1110 | return 0; | ||||
| 1111 | } | ||||
| 1112 | } | ||||
| 1113 | |||||
| 1114 | |||||
| 1115 | =head2 add_SeqFeature | ||||
| 1116 | |||||
| 1117 | Title : add_SeqFeature | ||||
| 1118 | Usage : $seq->add_SeqFeature($feat); | ||||
| 1119 | Function: Adds the given feature object to the feature array of this | ||||
| 1120 | sequence. The object passed is required to implement the | ||||
| 1121 | Bio::SeqFeatureI interface. | ||||
| 1122 | The 'EXPAND' qualifier (see L<Bio::FeatureHolderI>) is supported, but | ||||
| 1123 | has no effect, | ||||
| 1124 | Returns : 1 on success | ||||
| 1125 | Args : A Bio::SeqFeatureI implementing object. | ||||
| 1126 | |||||
| 1127 | =cut | ||||
| 1128 | |||||
| 1129 | sub add_SeqFeature { | ||||
| 1130 | my ($self, @feat) = @_; | ||||
| 1131 | |||||
| 1132 | $self->{'_as_feat'} = [] unless $self->{'_as_feat'}; | ||||
| 1133 | |||||
| 1134 | if (scalar @feat > 1) { | ||||
| 1135 | $self->deprecated( | ||||
| 1136 | -message => 'Providing an array of features to Bio::Seq add_SeqFeature()'. | ||||
| 1137 | ' is deprecated and will be removed in a future version. '. | ||||
| 1138 | 'Add a single feature at a time instead.', | ||||
| 1139 | -warn_version => 1.007, | ||||
| 1140 | -throw_version => 1.009, | ||||
| 1141 | ); | ||||
| 1142 | } | ||||
| 1143 | |||||
| 1144 | for my $feat ( @feat ) { | ||||
| 1145 | |||||
| 1146 | next if $feat eq 'EXPAND'; # Need to support it for FeatureHolderI compliance | ||||
| 1147 | |||||
| 1148 | if( !$feat->isa("Bio::SeqFeatureI") ) { | ||||
| 1149 | $self->throw("Expected a Bio::SeqFeatureI object, but got a $feat."); | ||||
| 1150 | } | ||||
| 1151 | |||||
| 1152 | # make sure we attach ourselves to the feature if the feature wants it | ||||
| 1153 | my $aseq = $self->primary_seq; | ||||
| 1154 | $feat->attach_seq($aseq) if $aseq; | ||||
| 1155 | |||||
| 1156 | push(@{$self->{'_as_feat'}},$feat); | ||||
| 1157 | } | ||||
| 1158 | return 1; | ||||
| 1159 | } | ||||
| 1160 | |||||
| 1161 | |||||
| 1162 | =head2 remove_SeqFeatures | ||||
| 1163 | |||||
| 1164 | Title : remove_SeqFeatures | ||||
| 1165 | Usage : $seq->remove_SeqFeatures(); | ||||
| 1166 | Function: Flushes all attached SeqFeatureI objects. | ||||
| 1167 | |||||
| 1168 | To remove individual feature objects, delete those from the returned | ||||
| 1169 | array and re-add the rest. | ||||
| 1170 | Example : | ||||
| 1171 | Returns : The array of Bio::SeqFeatureI objects removed from this seq. | ||||
| 1172 | Args : None | ||||
| 1173 | |||||
| 1174 | =cut | ||||
| 1175 | |||||
| 1176 | sub remove_SeqFeatures { | ||||
| 1177 | my $self = shift; | ||||
| 1178 | |||||
| 1179 | return () unless $self->{'_as_feat'}; | ||||
| 1180 | my @feats = @{$self->{'_as_feat'}}; | ||||
| 1181 | $self->{'_as_feat'} = []; | ||||
| 1182 | return @feats; | ||||
| 1183 | } | ||||
| 1184 | |||||
| 1185 | |||||
| 1186 | =head1 Methods provided in the Bio::PrimarySeqI interface | ||||
| 1187 | |||||
| 1188 | These methods are inherited from the PrimarySeq interface | ||||
| 1189 | and work as one expects, building new Bio::Seq objects | ||||
| 1190 | or other information as expected. See L<Bio::PrimarySeq> | ||||
| 1191 | for more information. | ||||
| 1192 | |||||
| 1193 | Sequence Features are B<not> transferred to the new objects. | ||||
| 1194 | This is possibly a mistake. Anyone who feels the urge in | ||||
| 1195 | dealing with this is welcome to give it a go. | ||||
| 1196 | |||||
| 1197 | =head2 revcom | ||||
| 1198 | |||||
| 1199 | Title : revcom | ||||
| 1200 | Usage : $rev = $seq->revcom() | ||||
| 1201 | Function: Produces a new Bio::Seq object which | ||||
| 1202 | is the reversed complement of the sequence. For protein | ||||
| 1203 | sequences this throws an exception of "Sequence is a protein. | ||||
| 1204 | Cannot revcom" | ||||
| 1205 | |||||
| 1206 | The id is the same id as the original sequence, and the | ||||
| 1207 | accession number is also identical. If someone wants to track | ||||
| 1208 | that this sequence has be reversed, it needs to define its own | ||||
| 1209 | extensions | ||||
| 1210 | |||||
| 1211 | To do an in-place edit of an object you can go: | ||||
| 1212 | |||||
| 1213 | $seq = $seq->revcom(); | ||||
| 1214 | |||||
| 1215 | This of course, causes Perl to handle the garbage collection of | ||||
| 1216 | the old object, but it is roughly speaking as efficient as an | ||||
| 1217 | in-place edit. | ||||
| 1218 | |||||
| 1219 | Returns : A new (fresh) Bio::Seq object | ||||
| 1220 | Args : None | ||||
| 1221 | |||||
| 1222 | =head2 trunc | ||||
| 1223 | |||||
| 1224 | Title : trunc | ||||
| 1225 | Usage : $subseq = $myseq->trunc(10,100); | ||||
| 1226 | Function: Provides a truncation of a sequence | ||||
| 1227 | |||||
| 1228 | Example : | ||||
| 1229 | Returns : A fresh Seq object | ||||
| 1230 | Args : A Seq object | ||||
| 1231 | |||||
| 1232 | =head2 id | ||||
| 1233 | |||||
| 1234 | Title : id | ||||
| 1235 | Usage : $id = $seq->id() | ||||
| 1236 | Function: This is mapped on display_id | ||||
| 1237 | Returns : value of display_id() | ||||
| 1238 | Args : [optional] value to update display_id | ||||
| 1239 | |||||
| 1240 | =cut | ||||
| 1241 | |||||
| 1242 | sub id { | ||||
| 1243 | return shift->display_id(@_); | ||||
| 1244 | } | ||||
| 1245 | |||||
| 1246 | |||||
| 1247 | =head1 Seq only methods | ||||
| 1248 | |||||
| 1249 | These methods are specific to the Bio::Seq object, and not | ||||
| 1250 | found on the Bio::PrimarySeq object | ||||
| 1251 | |||||
| 1252 | =head2 primary_seq | ||||
| 1253 | |||||
| 1254 | Title : primary_seq | ||||
| 1255 | Usage : $seq->primary_seq or $seq->primary_seq($newval) | ||||
| 1256 | Function: Get or set a PrimarySeq object | ||||
| 1257 | Example : | ||||
| 1258 | Returns : PrimarySeq object | ||||
| 1259 | Args : None or PrimarySeq object | ||||
| 1260 | |||||
| 1261 | =cut | ||||
| 1262 | |||||
| 1263 | sub primary_seq { | ||||
| 1264 | my ($obj,$value) = @_; | ||||
| 1265 | |||||
| 1266 | if( defined $value) { | ||||
| 1267 | if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) { | ||||
| 1268 | $obj->throw("$value is not a Bio::PrimarySeq compliant object"); | ||||
| 1269 | } | ||||
| 1270 | |||||
| 1271 | $obj->{'primary_seq'} = $value; | ||||
| 1272 | # descend down over all seqfeature objects, seeing whether they | ||||
| 1273 | # want an attached seq. | ||||
| 1274 | |||||
| 1275 | foreach my $sf ( $obj->get_SeqFeatures() ) { | ||||
| 1276 | $sf->attach_seq($value); | ||||
| 1277 | } | ||||
| 1278 | |||||
| 1279 | } | ||||
| 1280 | return $obj->{'primary_seq'}; | ||||
| 1281 | |||||
| 1282 | } | ||||
| 1283 | |||||
| 1284 | |||||
| 1285 | =head2 species | ||||
| 1286 | |||||
| 1287 | Title : species | ||||
| 1288 | Usage : $species = $seq->species() or $seq->species($species) | ||||
| 1289 | Function: Gets or sets the species | ||||
| 1290 | Returns : L<Bio::Species> object | ||||
| 1291 | Args : None or L<Bio::Species> object | ||||
| 1292 | |||||
| 1293 | See L<Bio::Species> for more information | ||||
| 1294 | |||||
| 1295 | =cut | ||||
| 1296 | |||||
| 1297 | sub species { | ||||
| 1298 | my ($self, $species) = @_; | ||||
| 1299 | if ($species) { | ||||
| 1300 | $self->{'species'} = $species; | ||||
| 1301 | } else { | ||||
| 1302 | return $self->{'species'}; | ||||
| 1303 | } | ||||
| 1304 | } | ||||
| 1305 | |||||
| 1306 | |||||
| 1307 | # Internal methods follow... | ||||
| 1308 | |||||
| 1309 | # keep AUTOLOAD happy | ||||
| 1310 | sub DESTROY { } | ||||
| 1311 | |||||
| 1312 | ############################################################################ | ||||
| 1313 | # aliases due to name changes or to compensate for our lack of consistency # | ||||
| 1314 | ############################################################################ | ||||
| 1315 | |||||
| 1316 | # in all other modules we use the object in the singular -- | ||||
| 1317 | # lack of consistency sucks | ||||
| 1318 | 1 | 2µs | *flush_SeqFeature = \&remove_SeqFeatures; | ||
| 1319 | 1 | 200ns | *flush_SeqFeatures = \&remove_SeqFeatures; | ||
| 1320 | |||||
| 1321 | # this is now get_SeqFeatures() (from FeatureHolderI) | ||||
| 1322 | 1 | 200ns | *top_SeqFeatures = \&get_SeqFeatures; | ||
| 1323 | |||||
| 1324 | # this is now get_all_SeqFeatures() in FeatureHolderI | ||||
| 1325 | sub all_SeqFeatures{ | ||||
| 1326 | return shift->get_all_SeqFeatures(@_); | ||||
| 1327 | } | ||||
| 1328 | |||||
| 1329 | sub accession { | ||||
| 1330 | my $self = shift; | ||||
| 1331 | $self->warn(ref($self)."::accession is deprecated, ". | ||||
| 1332 | "use accession_number() instead"); | ||||
| 1333 | return $self->accession_number(@_); | ||||
| 1334 | } | ||||
| 1335 | |||||
| 1336 | 1 | 7µs | 1; |