| Filename | /Users/ap13/perl5/lib/perl5/Bio/Tools/IUPAC.pm |
| Statements | Executed 14 statements in 1.67ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 64µs | 64µs | Bio::Tools::IUPAC::BEGIN@172 |
| 1 | 1 | 1 | 15µs | 29µs | Bio::Tools::IUPAC::BEGIN@168 |
| 1 | 1 | 1 | 9µs | 83µs | Bio::Tools::IUPAC::BEGIN@169 |
| 1 | 1 | 1 | 9µs | 95µs | Bio::Tools::IUPAC::BEGIN@170 |
| 1 | 1 | 1 | 8µs | 8µs | Bio::Tools::IUPAC::iupac_iup |
| 1 | 1 | 1 | 5µs | 5µs | Bio::Tools::IUPAC::iupac_iub |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::AUTOLOAD |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::_initialize |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::count |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::iupac |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::iupac_amb |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::iupac_iub_amb |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::iupac_iup_amb |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::iupac_rev_iub |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::new |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::next_seq |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::IUPAC::regexp |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # BioPerl module for IUPAC | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Aaron Mackey <amackey@virginia.edu> | ||||
| 7 | # | ||||
| 8 | # Copyright Aaron Mackey | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | |||||
| 12 | # POD documentation - main docs before the code | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | Bio::Tools::IUPAC - Generates unique sequence objects or regular expressions from | ||||
| 17 | an ambiguous IUPAC sequence | ||||
| 18 | |||||
| 19 | =head1 SYNOPSIS | ||||
| 20 | |||||
| 21 | use Bio::PrimarySeq; | ||||
| 22 | use Bio::Tools::IUPAC; | ||||
| 23 | |||||
| 24 | # Get the IUPAC code for proteins | ||||
| 25 | my %iupac_prot = Bio::Tools::IUPAC->new->iupac_iup; | ||||
| 26 | |||||
| 27 | # Create a sequence with degenerate residues | ||||
| 28 | my $ambiseq = Bio::PrimarySeq->new(-seq => 'ARTCGUTGN', -alphabet => 'dna'); | ||||
| 29 | |||||
| 30 | # Create all possible non-degenerate sequences | ||||
| 31 | my $iupac = Bio::Tools::IUPAC->new(-seq => $ambiseq); | ||||
| 32 | while ($uniqueseq = $iupac->next_seq()) { | ||||
| 33 | # process the unique Bio::Seq object. | ||||
| 34 | } | ||||
| 35 | |||||
| 36 | # Get a regular expression that matches all possible sequences | ||||
| 37 | my $regexp = $iupac->regexp(); | ||||
| 38 | |||||
| 39 | =head1 DESCRIPTION | ||||
| 40 | |||||
| 41 | Bio::Tools::IUPAC is a tool that manipulates sequences with ambiguous residues | ||||
| 42 | following the IUPAC conventions. Non-standard characters have the meaning | ||||
| 43 | described below: | ||||
| 44 | |||||
| 45 | IUPAC-IUB SYMBOLS FOR NUCLEOTIDE (DNA OR RNA) NOMENCLATURE: | ||||
| 46 | Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030 | ||||
| 47 | |||||
| 48 | --------------------------------------------------------------- | ||||
| 49 | Symbol Meaning Nucleic Acid | ||||
| 50 | --------------------------------------------------------------- | ||||
| 51 | A A Adenine | ||||
| 52 | C C Cytosine | ||||
| 53 | G G Guanine | ||||
| 54 | T T Thymine | ||||
| 55 | U U Uracil | ||||
| 56 | M A or C aMino | ||||
| 57 | R A or G puRine | ||||
| 58 | W A or T Weak | ||||
| 59 | S C or G Strong | ||||
| 60 | Y C or T pYrimidine | ||||
| 61 | K G or T Keto | ||||
| 62 | V A or C or G not T (closest unused char after T) | ||||
| 63 | H A or C or T not G (closest unused char after G) | ||||
| 64 | D A or G or T not C (closest unused char after C) | ||||
| 65 | B C or G or T not A (closest unused char after A) | ||||
| 66 | X G or A or T or C Unknown (very rarely used) | ||||
| 67 | N G or A or T or C Unknown (commonly used) | ||||
| 68 | |||||
| 69 | |||||
| 70 | IUPAC-IUP AMINO ACID SYMBOLS: | ||||
| 71 | Biochem J. 1984 Apr 15; 219(2): 345-373 | ||||
| 72 | Eur J Biochem. 1993 Apr 1; 213(1): 2 | ||||
| 73 | |||||
| 74 | ------------------------------------------ | ||||
| 75 | Symbol Meaning | ||||
| 76 | ------------------------------------------ | ||||
| 77 | A Alanine | ||||
| 78 | B Aspartic Acid, Asparagine | ||||
| 79 | C Cysteine | ||||
| 80 | D Aspartic Acid | ||||
| 81 | E Glutamic Acid | ||||
| 82 | F Phenylalanine | ||||
| 83 | G Glycine | ||||
| 84 | H Histidine | ||||
| 85 | I Isoleucine | ||||
| 86 | J Isoleucine/Leucine | ||||
| 87 | K Lysine | ||||
| 88 | L Leucine | ||||
| 89 | M Methionine | ||||
| 90 | N Asparagine | ||||
| 91 | O Pyrrolysine | ||||
| 92 | P Proline | ||||
| 93 | Q Glutamine | ||||
| 94 | R Arginine | ||||
| 95 | S Serine | ||||
| 96 | T Threonine | ||||
| 97 | U Selenocysteine | ||||
| 98 | V Valine | ||||
| 99 | W Tryptophan | ||||
| 100 | X Unknown | ||||
| 101 | Y Tyrosine | ||||
| 102 | Z Glutamic Acid, Glutamine | ||||
| 103 | * Terminator | ||||
| 104 | |||||
| 105 | There are a few things Bio::Tools::IUPAC can do for you: | ||||
| 106 | |||||
| 107 | =over | ||||
| 108 | |||||
| 109 | =item * | ||||
| 110 | |||||
| 111 | report the IUPAC mapping between ambiguous and non-ambiguous residues | ||||
| 112 | |||||
| 113 | =item * | ||||
| 114 | |||||
| 115 | produce a stream of all possible corresponding unambiguous Bio::Seq objects given | ||||
| 116 | an ambiguous sequence object | ||||
| 117 | |||||
| 118 | =item * | ||||
| 119 | |||||
| 120 | convert an ambiguous sequence object to a corresponding regular expression | ||||
| 121 | |||||
| 122 | =back | ||||
| 123 | |||||
| 124 | =head1 FEEDBACK | ||||
| 125 | |||||
| 126 | =head2 Mailing Lists | ||||
| 127 | |||||
| 128 | User feedback is an integral part of the evolution of this and other | ||||
| 129 | Bioperl modules. Send your comments and suggestions preferably to one | ||||
| 130 | of the Bioperl mailing lists. Your participation is much appreciated. | ||||
| 131 | |||||
| 132 | bioperl-l@bioperl.org - General discussion | ||||
| 133 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 134 | |||||
| 135 | =head2 Support | ||||
| 136 | |||||
| 137 | Please direct usage questions or support issues to the mailing list: | ||||
| 138 | |||||
| 139 | I<bioperl-l@bioperl.org> | ||||
| 140 | |||||
| 141 | rather than to the module maintainer directly. Many experienced and | ||||
| 142 | reponsive experts will be able look at the problem and quickly | ||||
| 143 | address it. Please include a thorough description of the problem | ||||
| 144 | with code and data examples if at all possible. | ||||
| 145 | |||||
| 146 | =head2 Reporting Bugs | ||||
| 147 | |||||
| 148 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 149 | the bugs and their resolution. Bug reports can be submitted via the | ||||
| 150 | web: | ||||
| 151 | |||||
| 152 | https://github.com/bioperl/bioperl-live/issues | ||||
| 153 | |||||
| 154 | =head1 AUTHOR - Aaron Mackey | ||||
| 155 | |||||
| 156 | Email amackey-at-virginia.edu | ||||
| 157 | |||||
| 158 | =head1 APPENDIX | ||||
| 159 | |||||
| 160 | The rest of the documentation details each of the object | ||||
| 161 | methods. Internal methods are usually preceded with a _ | ||||
| 162 | |||||
| 163 | =cut | ||||
| 164 | |||||
| 165 | |||||
| 166 | package Bio::Tools::IUPAC; | ||||
| 167 | |||||
| 168 | 2 | 30µs | 2 | 44µs | # spent 29µs (15+15) within Bio::Tools::IUPAC::BEGIN@168 which was called:
# once (15µs+15µs) by Bio::Tools::CodonTable::BEGIN@190 at line 168 # spent 29µs making 1 call to Bio::Tools::IUPAC::BEGIN@168
# spent 15µs making 1 call to strict::import |
| 169 | 2 | 33µs | 2 | 83µs | # spent 83µs (9+74) within Bio::Tools::IUPAC::BEGIN@169 which was called:
# once (9µs+74µs) by Bio::Tools::CodonTable::BEGIN@190 at line 169 # spent 83µs making 1 call to Bio::Tools::IUPAC::BEGIN@169
# spent 74µs making 1 call to base::import, recursion: max depth 2, sum of overlapping time 74µs |
| 170 | 2 | 289µs | 2 | 182µs | # spent 95µs (9+87) within Bio::Tools::IUPAC::BEGIN@170 which was called:
# once (9µs+87µs) by Bio::Tools::CodonTable::BEGIN@190 at line 170 # spent 95µs making 1 call to Bio::Tools::IUPAC::BEGIN@170
# spent 87µs making 1 call to vars::import |
| 171 | |||||
| 172 | # spent 64µs within Bio::Tools::IUPAC::BEGIN@172 which was called:
# once (64µs+0s) by Bio::Tools::CodonTable::BEGIN@190 at line 267 | ||||
| 173 | # Ambiguous nucleic residues are matched to unambiguous residues | ||||
| 174 | 1 | 24µs | %IUB = ( | ||
| 175 | A => [qw(A)], | ||||
| 176 | C => [qw(C)], | ||||
| 177 | G => [qw(G)], | ||||
| 178 | T => [qw(T)], | ||||
| 179 | U => [qw(U)], | ||||
| 180 | M => [qw(A C)], | ||||
| 181 | R => [qw(A G)], | ||||
| 182 | S => [qw(C G)], | ||||
| 183 | W => [qw(A T)], | ||||
| 184 | Y => [qw(C T)], | ||||
| 185 | K => [qw(G T)], | ||||
| 186 | V => [qw(A C G)], | ||||
| 187 | H => [qw(A C T)], | ||||
| 188 | D => [qw(A G T)], | ||||
| 189 | B => [qw(C G T)], | ||||
| 190 | N => [qw(A C G T)], | ||||
| 191 | X => [qw(A C G T)], | ||||
| 192 | ); | ||||
| 193 | |||||
| 194 | # Same as %IUB but ambiguous residues are matched to ambiguous residues only | ||||
| 195 | 1 | 10µs | %IUB_AMB = ( | ||
| 196 | M => [qw(M)], | ||||
| 197 | R => [qw(R)], | ||||
| 198 | W => [qw(W)], | ||||
| 199 | S => [qw(S)], | ||||
| 200 | Y => [qw(Y)], | ||||
| 201 | K => [qw(K)], | ||||
| 202 | V => [qw(M R S V)], | ||||
| 203 | H => [qw(H M W Y)], | ||||
| 204 | D => [qw(D K R W)], | ||||
| 205 | B => [qw(B K S Y)], | ||||
| 206 | N => [qw(B D H K M N R S V W Y)], | ||||
| 207 | ); | ||||
| 208 | |||||
| 209 | # The inverse of %IUB | ||||
| 210 | 1 | 8µs | %REV_IUB = ( | ||
| 211 | A => 'A', | ||||
| 212 | T => 'T', | ||||
| 213 | U => 'U', | ||||
| 214 | C => 'C', | ||||
| 215 | G => 'G', | ||||
| 216 | AC => 'M', | ||||
| 217 | AG => 'R', | ||||
| 218 | AT => 'W', | ||||
| 219 | CG => 'S', | ||||
| 220 | CT => 'Y', | ||||
| 221 | GT => 'K', | ||||
| 222 | ACG => 'V', | ||||
| 223 | ACT => 'H', | ||||
| 224 | AGT => 'D', | ||||
| 225 | CGT => 'B', | ||||
| 226 | ACGT => 'N', | ||||
| 227 | N => 'N' | ||||
| 228 | ); | ||||
| 229 | |||||
| 230 | # Same thing with proteins now | ||||
| 231 | 1 | 15µs | %IUP = ( | ||
| 232 | A => [qw(A)], | ||||
| 233 | B => [qw(D N)], | ||||
| 234 | C => [qw(C)], | ||||
| 235 | D => [qw(D)], | ||||
| 236 | E => [qw(E)], | ||||
| 237 | F => [qw(F)], | ||||
| 238 | G => [qw(G)], | ||||
| 239 | H => [qw(H)], | ||||
| 240 | I => [qw(I)], | ||||
| 241 | J => [qw(I L)], | ||||
| 242 | K => [qw(K)], | ||||
| 243 | L => [qw(L)], | ||||
| 244 | M => [qw(M)], | ||||
| 245 | N => [qw(N)], | ||||
| 246 | O => [qw(O)], | ||||
| 247 | P => [qw(P)], | ||||
| 248 | Q => [qw(Q)], | ||||
| 249 | R => [qw(R)], | ||||
| 250 | S => [qw(S)], | ||||
| 251 | T => [qw(T)], | ||||
| 252 | U => [qw(U)], | ||||
| 253 | V => [qw(V)], | ||||
| 254 | W => [qw(W)], | ||||
| 255 | X => [qw(X)], | ||||
| 256 | Y => [qw(Y)], | ||||
| 257 | Z => [qw(E Q)], | ||||
| 258 | '*' => [qw(*)], | ||||
| 259 | ); | ||||
| 260 | |||||
| 261 | 1 | 7µs | %IUP_AMB = ( | ||
| 262 | B => [qw(B)], | ||||
| 263 | J => [qw(J)], | ||||
| 264 | Z => [qw(Z)], | ||||
| 265 | ); | ||||
| 266 | |||||
| 267 | 1 | 1.23ms | 1 | 64µs | } # spent 64µs making 1 call to Bio::Tools::IUPAC::BEGIN@172 |
| 268 | |||||
| 269 | |||||
| 270 | =head2 new | ||||
| 271 | |||||
| 272 | Title : new | ||||
| 273 | Usage : Bio::Tools::IUPAC->new($seq); | ||||
| 274 | Function: Create a new IUPAC object, which acts as a sequence stream (akin to | ||||
| 275 | SeqIO) | ||||
| 276 | Args : an ambiguously coded sequence object that has a specified 'alphabet' | ||||
| 277 | Returns : a Bio::Tools::IUPAC object. | ||||
| 278 | |||||
| 279 | =cut | ||||
| 280 | |||||
| 281 | sub new { | ||||
| 282 | my ($class,@args) = @_; | ||||
| 283 | my $self = $class->SUPER::new(@args); | ||||
| 284 | my ($seq) = $self->_rearrange([qw(SEQ)],@args); | ||||
| 285 | |||||
| 286 | if ( (not defined $seq) && @args && ref($args[0]) ) { | ||||
| 287 | # parameter not passed as named parameter? | ||||
| 288 | $seq = $args[0]; | ||||
| 289 | } | ||||
| 290 | |||||
| 291 | if (defined $seq) { | ||||
| 292 | if (not $seq->isa('Bio::PrimarySeqI')) { | ||||
| 293 | $self->throw('Must supply a sequence object'); | ||||
| 294 | } | ||||
| 295 | if (length $seq->seq == 0) { | ||||
| 296 | $self->throw('Sequence had zero-length'); | ||||
| 297 | } | ||||
| 298 | $self->{'_seq'} = $seq; | ||||
| 299 | } | ||||
| 300 | |||||
| 301 | return $self; | ||||
| 302 | } | ||||
| 303 | |||||
| 304 | |||||
| 305 | sub _initialize { | ||||
| 306 | my ($self) = @_; | ||||
| 307 | my %iupac = $self->iupac; | ||||
| 308 | $self->{'_alpha'} = [ map { $iupac{uc $_} } split('', $self->{'_seq'}->seq) ]; | ||||
| 309 | $self->{'_string'} = [(0) x length($self->{'_seq'}->seq())]; | ||||
| 310 | $self->{'_string'}->[0] = -1; | ||||
| 311 | } | ||||
| 312 | |||||
| 313 | |||||
| 314 | =head2 next_seq | ||||
| 315 | |||||
| 316 | Title : next_seq | ||||
| 317 | Usage : $iupac->next_seq(); | ||||
| 318 | Function: returns the next unique sequence object | ||||
| 319 | Args : none. | ||||
| 320 | Returns : a Bio::Seq object | ||||
| 321 | |||||
| 322 | =cut | ||||
| 323 | |||||
| 324 | sub next_seq { | ||||
| 325 | my ($self) = @_; | ||||
| 326 | |||||
| 327 | if (not exists $self->{'_string'}) { | ||||
| 328 | $self->_initialize(); | ||||
| 329 | } | ||||
| 330 | |||||
| 331 | for my $i ( 0 .. $#{$self->{'_string'}} ) { | ||||
| 332 | next unless $self->{'_string'}->[$i] || @{$self->{'_alpha'}->[$i]} > 1; | ||||
| 333 | if ( $self->{'_string'}->[$i] == $#{$self->{'_alpha'}->[$i]} ) { # rollover | ||||
| 334 | if ( $i == $#{$self->{'_string'}} ) { # end of possibilities | ||||
| 335 | return; | ||||
| 336 | } else { | ||||
| 337 | $self->{'_string'}->[$i] = 0; | ||||
| 338 | next; | ||||
| 339 | } | ||||
| 340 | } else { | ||||
| 341 | $self->{'_string'}->[$i]++; | ||||
| 342 | my $j = -1; | ||||
| 343 | my $seqstr = join('', map { $j++; $self->{'_alpha'}->[$j]->[$_]; } @{$self->{'_string'}}); | ||||
| 344 | my $desc = $self->{'_seq'}->desc() || ''; | ||||
| 345 | $self->{'_num'}++; | ||||
| 346 | 1 while $self->{'_num'} =~ s/(\d)(\d\d\d)(?!\d)/$1,$2/; | ||||
| 347 | $desc =~ s/( \[Bio::Tools::IUPAC-generated\sunique sequence # [^\]]*\])|$/ \[Bio::Tools::IUPAC-generated unique sequence # $self->{'_num'}\]/; | ||||
| 348 | $self->{'_num'} =~ s/,//g; | ||||
| 349 | |||||
| 350 | # Return a fresh sequence object | ||||
| 351 | return Bio::PrimarySeq->new(-seq => $seqstr, -desc => $desc); | ||||
| 352 | } | ||||
| 353 | } | ||||
| 354 | } | ||||
| 355 | |||||
| 356 | |||||
| 357 | =head2 iupac | ||||
| 358 | |||||
| 359 | Title : iupac | ||||
| 360 | Usage : my %symbols = $iupac->iupac; | ||||
| 361 | Function: Returns a hash of symbols -> symbol components of the right type | ||||
| 362 | for the given sequence, i.e. it is the same as iupac_iup() if | ||||
| 363 | Bio::Tools::IUPAC was given a proteic sequence, or iupac_iub() if the | ||||
| 364 | sequence was nucleic. For example, the key 'M' has the value ['A', 'C']. | ||||
| 365 | Args : none | ||||
| 366 | Returns : Hash | ||||
| 367 | |||||
| 368 | =cut | ||||
| 369 | |||||
| 370 | sub iupac { | ||||
| 371 | my ($self) = @_; | ||||
| 372 | my $alphabet = lc( $self->{'_seq'}->alphabet() ); | ||||
| 373 | if ( ($alphabet eq 'dna') or ($alphabet eq 'rna') ) { | ||||
| 374 | return %IUB; # nucleic | ||||
| 375 | } elsif ( $alphabet eq 'protein' ) { | ||||
| 376 | return %IUP; # proteic | ||||
| 377 | } else { | ||||
| 378 | $self->throw("The input sequence had the unknown alphabet '$alphabet'\n"); | ||||
| 379 | } | ||||
| 380 | } | ||||
| 381 | |||||
| - - | |||||
| 384 | =head2 iupac_amb | ||||
| 385 | |||||
| 386 | Title : iupac_amb | ||||
| 387 | Usage : my %symbols = $iupac->iupac_amb; | ||||
| 388 | Function: Same as iupac() but only contains a mapping between ambiguous residues | ||||
| 389 | and the ambiguous residues they map to. For example, the key 'N' has | ||||
| 390 | the value ['R', 'Y', 'K', 'M', 'S', 'W', 'B', 'D', 'H', 'V', 'N'], | ||||
| 391 | i.e. it matches all other ambiguous residues. | ||||
| 392 | Args : none | ||||
| 393 | Returns : Hash | ||||
| 394 | |||||
| 395 | =cut | ||||
| 396 | |||||
| 397 | sub iupac_amb { | ||||
| 398 | my ($self) = @_; | ||||
| 399 | my $alphabet = lc( $self->{'_seq'}->alphabet() ); | ||||
| 400 | if ( ($alphabet eq 'dna') or ($alphabet eq 'rna') ) { | ||||
| 401 | return %IUB_AMB; # nucleic | ||||
| 402 | } elsif ( $alphabet eq 'protein' ) { | ||||
| 403 | return %IUP_AMB; # proteic | ||||
| 404 | } else { | ||||
| 405 | $self->throw("The input sequence had the unknown alphabet '$alphabet'\n"); | ||||
| 406 | } | ||||
| 407 | } | ||||
| 408 | |||||
| 409 | |||||
| 410 | =head2 iupac_iup | ||||
| 411 | |||||
| 412 | Title : iupac_iup | ||||
| 413 | Usage : my %aasymbols = $iupac->iupac_iup; | ||||
| 414 | Function: Returns a hash of PROTEIN symbols -> non-ambiguous symbol components | ||||
| 415 | Args : none | ||||
| 416 | Returns : Hash | ||||
| 417 | |||||
| 418 | =cut | ||||
| 419 | |||||
| 420 | # spent 8µs within Bio::Tools::IUPAC::iupac_iup which was called:
# once (8µs+0s) by Bio::Tools::CodonTable::BEGIN@198 at line 294 of Bio/Tools/CodonTable.pm | ||||
| 421 | 1 | 12µs | return %IUP; | ||
| 422 | } | ||||
| 423 | |||||
| 424 | |||||
| 425 | =head2 iupac_iup_amb | ||||
| 426 | |||||
| 427 | Title : iupac_iup_amb | ||||
| 428 | Usage : my %aasymbols = $iupac->iupac_iup_amb; | ||||
| 429 | Function: Returns a hash of PROTEIN symbols -> ambiguous symbol components | ||||
| 430 | Args : none | ||||
| 431 | Returns : Hash | ||||
| 432 | |||||
| 433 | =cut | ||||
| 434 | |||||
| 435 | sub iupac_iup_amb { | ||||
| 436 | return %IUP_AMB; | ||||
| 437 | } | ||||
| 438 | |||||
| 439 | |||||
| 440 | =head2 iupac_iub | ||||
| 441 | |||||
| 442 | Title : iupac_iub | ||||
| 443 | Usage : my %dnasymbols = $iupac->iupac_iub; | ||||
| 444 | Function: Returns a hash of DNA symbols -> non-ambiguous symbol components | ||||
| 445 | Args : none | ||||
| 446 | Returns : Hash | ||||
| 447 | |||||
| 448 | =cut | ||||
| 449 | |||||
| 450 | # spent 5µs within Bio::Tools::IUPAC::iupac_iub which was called:
# once (5µs+0s) by Bio::Tools::CodonTable::BEGIN@198 at line 293 of Bio/Tools/CodonTable.pm | ||||
| 451 | 1 | 9µs | return %IUB; | ||
| 452 | } | ||||
| 453 | |||||
| 454 | |||||
| 455 | =head2 iupac_iub_amb | ||||
| 456 | |||||
| 457 | Title : iupac_iub_amb | ||||
| 458 | Usage : my %dnasymbols = $iupac->iupac_iub; | ||||
| 459 | Function: Returns a hash of DNA symbols -> ambiguous symbol components | ||||
| 460 | Args : none | ||||
| 461 | Returns : Hash | ||||
| 462 | |||||
| 463 | =cut | ||||
| 464 | |||||
| 465 | sub iupac_iub_amb { | ||||
| 466 | return %IUB_AMB; | ||||
| 467 | } | ||||
| 468 | |||||
| 469 | |||||
| 470 | =head2 iupac_rev_iub | ||||
| 471 | |||||
| 472 | Title : iupac_rev_iub | ||||
| 473 | Usage : my %dnasymbols = $iupac->iupac_rev_iub; | ||||
| 474 | Function: Returns a hash of nucleotide combinations -> IUPAC code | ||||
| 475 | (a reverse of the iupac_iub hash). | ||||
| 476 | Args : none | ||||
| 477 | Returns : Hash | ||||
| 478 | |||||
| 479 | =cut | ||||
| 480 | |||||
| 481 | sub iupac_rev_iub { | ||||
| 482 | return %REV_IUB; | ||||
| 483 | } | ||||
| 484 | |||||
| 485 | |||||
| 486 | =head2 count | ||||
| 487 | |||||
| 488 | Title : count | ||||
| 489 | Usage : my $total = $iupac->count(); | ||||
| 490 | Function: Calculates the number of unique, unambiguous sequences that | ||||
| 491 | this ambiguous sequence could generate | ||||
| 492 | Args : none | ||||
| 493 | Return : int | ||||
| 494 | |||||
| 495 | =cut | ||||
| 496 | |||||
| 497 | sub count { | ||||
| 498 | my ($self) = @_; | ||||
| 499 | if (not exists $self->{'_string'}) { | ||||
| 500 | $self->_initialize(); | ||||
| 501 | } | ||||
| 502 | my $count = 1; | ||||
| 503 | $count *= scalar(@$_) for (@{$self->{'_alpha'}}); | ||||
| 504 | return $count; | ||||
| 505 | } | ||||
| 506 | |||||
| 507 | |||||
| 508 | =head2 regexp | ||||
| 509 | |||||
| 510 | Title : regexp | ||||
| 511 | Usage : my $re = $iupac->regexp(); | ||||
| 512 | Function: Converts the ambiguous sequence into a regular expression that | ||||
| 513 | matches all of the corresponding ambiguous and non-ambiguous sequences. | ||||
| 514 | You can further manipulate the resulting regular expression with the | ||||
| 515 | Bio::Tools::SeqPattern module. After you are done building your | ||||
| 516 | regular expression, you might want to compile it and make it case- | ||||
| 517 | insensitive: | ||||
| 518 | $re = qr/$re/i; | ||||
| 519 | Args : 1 to match RNA: T and U characters will match interchangeably | ||||
| 520 | Return : regular expression | ||||
| 521 | |||||
| 522 | =cut | ||||
| 523 | |||||
| 524 | sub regexp { | ||||
| 525 | my ($self, $match_rna) = @_; | ||||
| 526 | my $re; | ||||
| 527 | my $seq = $self->{'_seq'}->seq; | ||||
| 528 | my %iupac = $self->iupac; | ||||
| 529 | my %iupac_amb = $self->iupac_amb; | ||||
| 530 | for my $pos (0 .. length($seq)-1) { | ||||
| 531 | my $res = substr $seq, $pos, 1; | ||||
| 532 | my $iupacs = $iupac{$res}; | ||||
| 533 | my $iupacs_amb = $iupac_amb{$res} || []; | ||||
| 534 | if (not defined $iupacs) { | ||||
| 535 | $self->throw("Primer sequence '$seq' is not a valid IUPAC sequence.". | ||||
| 536 | " Offending character was '$res'.\n"); | ||||
| 537 | } | ||||
| 538 | my $part = join '', (@$iupacs, @$iupacs_amb); | ||||
| 539 | if ($match_rna) { | ||||
| 540 | $part =~ s/T/TU/i || $part =~ s/U/TU/i; | ||||
| 541 | } | ||||
| 542 | if (length $part > 1) { | ||||
| 543 | $part = '['.$part.']'; | ||||
| 544 | } | ||||
| 545 | $re .= $part; | ||||
| 546 | } | ||||
| 547 | return $re; | ||||
| 548 | } | ||||
| 549 | |||||
| 550 | |||||
| 551 | sub AUTOLOAD { | ||||
| 552 | my $self = shift @_; | ||||
| 553 | my $method = $AUTOLOAD; | ||||
| 554 | $method =~ s/.*:://; | ||||
| 555 | return $self->{'_seq'}->$method(@_) | ||||
| 556 | unless $method eq 'DESTROY'; | ||||
| 557 | } | ||||
| 558 | |||||
| 559 | 1 | 3µs | 1; | ||
| 560 |