| Filename | /Users/ap13/perl5/lib/perl5/Bio/Tools/CodonTable.pm |
| Statements | Executed 303 statements in 2.75ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 5.35ms | 6.63ms | Bio::Tools::CodonTable::BEGIN@191 |
| 1 | 1 | 1 | 1.76ms | 2.03ms | Bio::Tools::CodonTable::BEGIN@190 |
| 1 | 1 | 1 | 215µs | 361µs | Bio::Tools::CodonTable::BEGIN@198 |
| 1 | 1 | 1 | 15µs | 212µs | Bio::Tools::CodonTable::BEGIN@185 |
| 1 | 1 | 1 | 12µs | 86µs | Bio::Tools::CodonTable::BEGIN@193 |
| 1 | 1 | 1 | 10µs | 64µs | Bio::Tools::CodonTable::BEGIN@199 |
| 1 | 1 | 1 | 10µs | 25µs | Bio::Tools::CodonTable::BEGIN@187 |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::_codon_is |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::_make_iupac_string |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::_translate_ambiguous_codon |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::_unambiquous_codons |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::add_table |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::id |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::is_start_codon |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::is_ter_codon |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::is_unknown_codon |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::name |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::new |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::reverse_translate_all |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::reverse_translate_best |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::revtranslate |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::tables |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::translate |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::translate_strict |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::CodonTable::unambiguous_codons |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # | ||||
| 2 | # bioperl module for Bio::Tools::CodonTable | ||||
| 3 | # | ||||
| 4 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 5 | # | ||||
| 6 | # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org> | ||||
| 7 | # | ||||
| 8 | # Copyright Heikki Lehvaslaiho | ||||
| 9 | # | ||||
| 10 | # You may distribute this module under the same terms as perl itself | ||||
| 11 | |||||
| 12 | # POD documentation - main docs before the code | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | Bio::Tools::CodonTable - Codon table object | ||||
| 17 | |||||
| 18 | =head1 SYNOPSIS | ||||
| 19 | |||||
| 20 | # This is a read-only class for all known codon tables. The IDs are | ||||
| 21 | # the ones used by nucleotide sequence databases. All common IUPAC | ||||
| 22 | # ambiguity codes for DNA, RNA and amino acids are recognized. | ||||
| 23 | |||||
| 24 | use Bio::Tools::CodonTable; | ||||
| 25 | |||||
| 26 | # defaults to ID 1 "Standard" | ||||
| 27 | $myCodonTable = Bio::Tools::CodonTable->new(); | ||||
| 28 | $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 ); | ||||
| 29 | |||||
| 30 | # change codon table | ||||
| 31 | $myCodonTable->id(5); | ||||
| 32 | |||||
| 33 | # examine codon table | ||||
| 34 | print join (' ', "The name of the codon table no.", $myCodonTable->id(4), | ||||
| 35 | "is:", $myCodonTable->name(), "\n"); | ||||
| 36 | |||||
| 37 | # print possible codon tables | ||||
| 38 | $tables = Bio::Tools::CodonTable->tables; | ||||
| 39 | while ( ($id,$name) = each %{$tables} ) { | ||||
| 40 | print "$id = $name\n"; | ||||
| 41 | } | ||||
| 42 | |||||
| 43 | # translate a codon | ||||
| 44 | $aa = $myCodonTable->translate('ACU'); | ||||
| 45 | $aa = $myCodonTable->translate('act'); | ||||
| 46 | $aa = $myCodonTable->translate('ytr'); | ||||
| 47 | |||||
| 48 | # reverse translate an amino acid | ||||
| 49 | @codons = $myCodonTable->revtranslate('A'); | ||||
| 50 | @codons = $myCodonTable->revtranslate('Ser'); | ||||
| 51 | @codons = $myCodonTable->revtranslate('Glx'); | ||||
| 52 | @codons = $myCodonTable->revtranslate('cYS', 'rna'); | ||||
| 53 | |||||
| 54 | # reverse translate an entire amino acid sequence into a IUPAC | ||||
| 55 | # nucleotide string | ||||
| 56 | |||||
| 57 | my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL'); | ||||
| 58 | my $iupac_str = $myCodonTable->reverse_translate_all($seqobj); | ||||
| 59 | |||||
| 60 | # boolean tests | ||||
| 61 | print "Is a start\n" if $myCodonTable->is_start_codon('ATG'); | ||||
| 62 | print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar'); | ||||
| 63 | print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG'); | ||||
| 64 | |||||
| 65 | =head1 DESCRIPTION | ||||
| 66 | |||||
| 67 | Codon tables are also called translation tables or genetic codes | ||||
| 68 | since that is what they represent. A bit more complete picture | ||||
| 69 | of the full complexity of codon usage in various taxonomic groups | ||||
| 70 | is presented at the NCBI Genetic Codes Home page. | ||||
| 71 | |||||
| 72 | CodonTable is a BioPerl class that knows all current translation | ||||
| 73 | tables that are used by primary nucleotide sequence databases | ||||
| 74 | (GenBank, EMBL and DDBJ). It provides methods to output information | ||||
| 75 | about tables and relationships between codons and amino acids. | ||||
| 76 | |||||
| 77 | This class and its methods recognized all common IUPAC ambiguity codes | ||||
| 78 | for DNA, RNA and animo acids. The translation method follows the | ||||
| 79 | conventions in EMBL and TREMBL databases. | ||||
| 80 | |||||
| 81 | It is a nuisance to separate RNA and cDNA representations of nucleic | ||||
| 82 | acid transcripts. The CodonTable object accepts codons of both type as | ||||
| 83 | input and allows the user to set the mode for output when reverse | ||||
| 84 | translating. Its default for output is DNA. | ||||
| 85 | |||||
| 86 | Note: | ||||
| 87 | |||||
| 88 | This class deals primarily with individual codons and amino | ||||
| 89 | acids. However in the interest of speed you can L<translate> | ||||
| 90 | longer sequence, too. The full complexity of protein translation | ||||
| 91 | is tackled by L<Bio::PrimarySeqI::translate>. | ||||
| 92 | |||||
| 93 | |||||
| 94 | The amino acid codes are IUPAC recommendations for common amino acids: | ||||
| 95 | |||||
| 96 | A Ala Alanine | ||||
| 97 | R Arg Arginine | ||||
| 98 | N Asn Asparagine | ||||
| 99 | D Asp Aspartic acid | ||||
| 100 | C Cys Cysteine | ||||
| 101 | Q Gln Glutamine | ||||
| 102 | E Glu Glutamic acid | ||||
| 103 | G Gly Glycine | ||||
| 104 | H His Histidine | ||||
| 105 | I Ile Isoleucine | ||||
| 106 | L Leu Leucine | ||||
| 107 | K Lys Lysine | ||||
| 108 | M Met Methionine | ||||
| 109 | F Phe Phenylalanine | ||||
| 110 | P Pro Proline | ||||
| 111 | O Pyl Pyrrolysine (22nd amino acid) | ||||
| 112 | U Sec Selenocysteine (21st amino acid) | ||||
| 113 | S Ser Serine | ||||
| 114 | T Thr Threonine | ||||
| 115 | W Trp Tryptophan | ||||
| 116 | Y Tyr Tyrosine | ||||
| 117 | V Val Valine | ||||
| 118 | B Asx Aspartic acid or Asparagine | ||||
| 119 | Z Glx Glutamine or Glutamic acid | ||||
| 120 | J Xle Isoleucine or Valine (mass spec ambiguity) | ||||
| 121 | X Xaa Any or unknown amino acid | ||||
| 122 | |||||
| 123 | |||||
| 124 | It is worth noting that, "Bacterial" codon table no. 11 produces an | ||||
| 125 | polypeptide that is, confusingly, identical to the standard one. The | ||||
| 126 | only differences are in available initiator codons. | ||||
| 127 | |||||
| 128 | |||||
| 129 | NCBI Genetic Codes home page: | ||||
| 130 | http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c | ||||
| 131 | |||||
| 132 | EBI Translation Table Viewer: | ||||
| 133 | http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi | ||||
| 134 | |||||
| 135 | Amended ASN.1 version with ids 16 and 21 is at: | ||||
| 136 | ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/ | ||||
| 137 | |||||
| 138 | Thanks to Matteo diTomasso for the original Perl implementation | ||||
| 139 | of these tables. | ||||
| 140 | |||||
| 141 | =head1 FEEDBACK | ||||
| 142 | |||||
| 143 | =head2 Mailing Lists | ||||
| 144 | |||||
| 145 | User feedback is an integral part of the evolution of this and other | ||||
| 146 | Bioperl modules. Send your comments and suggestions preferably to the | ||||
| 147 | Bioperl mailing lists Your participation is much appreciated. | ||||
| 148 | |||||
| 149 | bioperl-l@bioperl.org - General discussion | ||||
| 150 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 151 | |||||
| 152 | =head2 Support | ||||
| 153 | |||||
| 154 | Please direct usage questions or support issues to the mailing list: | ||||
| 155 | |||||
| 156 | I<bioperl-l@bioperl.org> | ||||
| 157 | |||||
| 158 | rather than to the module maintainer directly. Many experienced and | ||||
| 159 | reponsive experts will be able look at the problem and quickly | ||||
| 160 | address it. Please include a thorough description of the problem | ||||
| 161 | with code and data examples if at all possible. | ||||
| 162 | |||||
| 163 | =head2 Reporting Bugs | ||||
| 164 | |||||
| 165 | Report bugs to the Bioperl bug tracking system to help us keep track | ||||
| 166 | the bugs and their resolution. Bug reports can be submitted via the | ||||
| 167 | web: | ||||
| 168 | |||||
| 169 | https://github.com/bioperl/bioperl-live/issues | ||||
| 170 | |||||
| 171 | =head1 AUTHOR - Heikki Lehvaslaiho | ||||
| 172 | |||||
| 173 | Email: heikki-at-bioperl-dot-org | ||||
| 174 | |||||
| 175 | =head1 APPENDIX | ||||
| 176 | |||||
| 177 | The rest of the documentation details each of the object | ||||
| 178 | methods. Internal methods are usually preceded with a _ | ||||
| 179 | |||||
| 180 | =cut | ||||
| 181 | |||||
| 182 | # Let the code begin... | ||||
| 183 | |||||
| 184 | package Bio::Tools::CodonTable; | ||||
| 185 | 2 | 7µs | # spent 212µs (15+196) within Bio::Tools::CodonTable::BEGIN@185 which was called:
# once (15µs+196µs) by Bio::PrimarySeqI::BEGIN@124 at line 186 | ||
| 186 | 1 | 29µs | 2 | 408µs | %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); # spent 212µs making 1 call to Bio::Tools::CodonTable::BEGIN@185
# spent 196µs making 1 call to vars::import |
| 187 | 2 | 28µs | 2 | 40µs | # spent 25µs (10+15) within Bio::Tools::CodonTable::BEGIN@187 which was called:
# once (10µs+15µs) by Bio::PrimarySeqI::BEGIN@124 at line 187 # spent 25µs making 1 call to Bio::Tools::CodonTable::BEGIN@187
# spent 15µs making 1 call to strict::import |
| 188 | |||||
| 189 | # Object preamble - inherits from Bio::Root::Root | ||||
| 190 | 2 | 221µs | 1 | 2.03ms | # spent 2.03ms (1.76+271µs) within Bio::Tools::CodonTable::BEGIN@190 which was called:
# once (1.76ms+271µs) by Bio::PrimarySeqI::BEGIN@124 at line 190 # spent 2.03ms making 1 call to Bio::Tools::CodonTable::BEGIN@190 |
| 191 | 2 | 205µs | 1 | 6.63ms | # spent 6.63ms (5.35+1.28) within Bio::Tools::CodonTable::BEGIN@191 which was called:
# once (5.35ms+1.28ms) by Bio::PrimarySeqI::BEGIN@124 at line 191 # spent 6.63ms making 1 call to Bio::Tools::CodonTable::BEGIN@191 |
| 192 | |||||
| 193 | 2 | 33µs | 2 | 86µs | # spent 86µs (12+73) within Bio::Tools::CodonTable::BEGIN@193 which was called:
# once (12µs+73µs) by Bio::PrimarySeqI::BEGIN@124 at line 193 # spent 86µs making 1 call to Bio::Tools::CodonTable::BEGIN@193
# spent 73µs making 1 call to base::import, recursion: max depth 2, sum of overlapping time 73µs |
| 194 | |||||
| 195 | |||||
| 196 | # first set internal values for all translation tables | ||||
| 197 | |||||
| 198 | # spent 361µs (215+146) within Bio::Tools::CodonTable::BEGIN@198 which was called:
# once (215µs+146µs) by Bio::PrimarySeqI::BEGIN@124 at line 298 | ||||
| 199 | 2 | 264µs | 2 | 116µs | # spent 64µs (10+53) within Bio::Tools::CodonTable::BEGIN@199 which was called:
# once (10µs+53µs) by Bio::PrimarySeqI::BEGIN@124 at line 199 # spent 64µs making 1 call to Bio::Tools::CodonTable::BEGIN@199
# spent 53µs making 1 call to constant::import |
| 200 | 14 | 55µs | $GAP = '-'; | ||
| 201 | $CODONGAP = $GAP x CODONSIZE; | ||||
| 202 | |||||
| 203 | @NAMES = #id | ||||
| 204 | ( | ||||
| 205 | 'Standard', #1 | ||||
| 206 | 'Vertebrate Mitochondrial',#2 | ||||
| 207 | 'Yeast Mitochondrial',# 3 | ||||
| 208 | 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4 | ||||
| 209 | 'Invertebrate Mitochondrial',#5 | ||||
| 210 | 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 | ||||
| 211 | '', '', | ||||
| 212 | 'Echinoderm Mitochondrial',#9 | ||||
| 213 | 'Euplotid Nuclear',#10 | ||||
| 214 | '"Bacterial"',# 11 | ||||
| 215 | 'Alternative Yeast Nuclear',# 12 | ||||
| 216 | 'Ascidian Mitochondrial',# 13 | ||||
| 217 | 'Flatworm Mitochondrial',# 14 | ||||
| 218 | 'Blepharisma Nuclear',# 15 | ||||
| 219 | 'Chlorophycean Mitochondrial',# 16 | ||||
| 220 | '', '', '', '', | ||||
| 221 | 'Trematode Mitochondrial',# 21 | ||||
| 222 | 'Scenedesmus obliquus Mitochondrial', #22 | ||||
| 223 | 'Thraustochytrium Mitochondrial', #23 | ||||
| 224 | 'Strict', #24, option for only ATG start | ||||
| 225 | ); | ||||
| 226 | |||||
| 227 | @TABLES = | ||||
| 228 | qw( | ||||
| 229 | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 230 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | ||||
| 231 | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 232 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 233 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | ||||
| 234 | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 235 | '' '' | ||||
| 236 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ||||
| 237 | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 238 | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 239 | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 240 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | ||||
| 241 | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ||||
| 242 | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 243 | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 244 | '' '' '' '' | ||||
| 245 | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ||||
| 246 | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 247 | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 248 | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ||||
| 249 | ); | ||||
| 250 | |||||
| 251 | # (bases used for these tables, for reference) | ||||
| 252 | # 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG | ||||
| 253 | # 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG | ||||
| 254 | # 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG | ||||
| 255 | |||||
| 256 | @STARTS = | ||||
| 257 | qw( | ||||
| 258 | ---M---------------M---------------M---------------------------- | ||||
| 259 | --------------------------------MMMM---------------M------------ | ||||
| 260 | ----------------------------------MM---------------------------- | ||||
| 261 | --MM---------------M------------MMMM---------------M------------ | ||||
| 262 | ---M----------------------------MMMM---------------M------------ | ||||
| 263 | -----------------------------------M---------------------------- | ||||
| 264 | '' '' | ||||
| 265 | -----------------------------------M---------------------------- | ||||
| 266 | -----------------------------------M---------------------------- | ||||
| 267 | ---M---------------M------------MMMM---------------M------------ | ||||
| 268 | -------------------M---------------M---------------------------- | ||||
| 269 | -----------------------------------M---------------------------- | ||||
| 270 | -----------------------------------M---------------------------- | ||||
| 271 | -----------------------------------M---------------------------- | ||||
| 272 | -----------------------------------M---------------------------- | ||||
| 273 | '' '' '' '' | ||||
| 274 | -----------------------------------M---------------M------------ | ||||
| 275 | -----------------------------------M---------------------------- | ||||
| 276 | --------------------------------M--M---------------M------------ | ||||
| 277 | -----------------------------------M---------------------------- | ||||
| 278 | ); | ||||
| 279 | |||||
| 280 | my @nucs = qw(t c a g); | ||||
| 281 | my $x = 0; | ||||
| 282 | ($CODONS, $TRCOL) = ({}, {}); | ||||
| 283 | for my $i (@nucs) { | ||||
| 284 | 4 | 1µs | for my $j (@nucs) { | ||
| 285 | 16 | 6µs | for my $k (@nucs) { | ||
| 286 | 256 | 103µs | my $codon = "$i$j$k"; | ||
| 287 | $CODONS->{$codon} = $x; | ||||
| 288 | $TRCOL->{$x} = $codon; | ||||
| 289 | $x++; | ||||
| 290 | } | ||||
| 291 | } | ||||
| 292 | } | ||||
| 293 | 1 | 5µs | %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); # spent 5µs making 1 call to Bio::Tools::IUPAC::iupac_iub | ||
| 294 | 1 | 8µs | %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); # spent 8µs making 1 call to Bio::Tools::IUPAC::iupac_iup | ||
| 295 | 1 | 51µs | %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); # spent 51µs making 1 call to Bio::SeqUtils::valid_aa | ||
| 296 | 1 | 82µs | $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']'; # spent 82µs making 1 call to Bio::SeqUtils::valid_aa | ||
| 297 | $TERMINATOR = '*'; | ||||
| 298 | 1 | 1.80ms | 1 | 361µs | } # spent 361µs making 1 call to Bio::Tools::CodonTable::BEGIN@198 |
| 299 | |||||
| 300 | sub new { | ||||
| 301 | my($class,@args) = @_; | ||||
| 302 | my $self = $class->SUPER::new(@args); | ||||
| 303 | |||||
| 304 | my($id) = | ||||
| 305 | $self->_rearrange([qw(ID | ||||
| 306 | )], | ||||
| 307 | @args); | ||||
| 308 | |||||
| 309 | $id = 1 if ( ! $id ); | ||||
| 310 | $id && $self->id($id); | ||||
| 311 | return $self; # success - we hope! | ||||
| 312 | } | ||||
| 313 | |||||
| 314 | =head2 id | ||||
| 315 | |||||
| 316 | Title : id | ||||
| 317 | Usage : $obj->id(3); $id_integer = $obj->id(); | ||||
| 318 | Function: Sets or returns the id of the translation table. IDs are | ||||
| 319 | integers from 1 to 15, excluding 7 and 8 which have been | ||||
| 320 | removed as redundant. If an invalid ID is given the method | ||||
| 321 | returns 0, false. | ||||
| 322 | Example : | ||||
| 323 | Returns : value of id, a scalar, 0 if not a valid | ||||
| 324 | Args : newvalue (optional) | ||||
| 325 | |||||
| 326 | =cut | ||||
| 327 | |||||
| 328 | sub id{ | ||||
| 329 | my ($self,$value) = @_; | ||||
| 330 | if( defined $value) { | ||||
| 331 | if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') { | ||||
| 332 | $self->warn("Not a valid codon table ID [$value] "); | ||||
| 333 | $value = 0; | ||||
| 334 | } | ||||
| 335 | $self->{'id'} = $value; | ||||
| 336 | } | ||||
| 337 | return $self->{'id'}; | ||||
| 338 | } | ||||
| 339 | |||||
| 340 | =head2 name | ||||
| 341 | |||||
| 342 | Title : name | ||||
| 343 | Usage : $obj->name() | ||||
| 344 | Function: returns the descriptive name of the translation table | ||||
| 345 | Example : | ||||
| 346 | Returns : A string | ||||
| 347 | Args : None | ||||
| 348 | |||||
| 349 | |||||
| 350 | =cut | ||||
| 351 | |||||
| 352 | sub name{ | ||||
| 353 | my ($self) = @_; | ||||
| 354 | |||||
| 355 | my ($id) = $self->{'id'}; | ||||
| 356 | return $NAMES[$id-1]; | ||||
| 357 | } | ||||
| 358 | |||||
| 359 | =head2 tables | ||||
| 360 | |||||
| 361 | Title : tables | ||||
| 362 | Usage : $obj->tables() or Bio::Tools::CodonTable->tables() | ||||
| 363 | Function: returns a hash reference where each key is a valid codon | ||||
| 364 | table id() number, and each value is the corresponding | ||||
| 365 | codon table name() string | ||||
| 366 | Example : | ||||
| 367 | Returns : A hashref | ||||
| 368 | Args : None | ||||
| 369 | |||||
| 370 | |||||
| 371 | =cut | ||||
| 372 | |||||
| 373 | sub tables{ | ||||
| 374 | my %tables; | ||||
| 375 | for my $id (1 .. @NAMES) { | ||||
| 376 | my $name = $NAMES[$id-1]; | ||||
| 377 | $tables{$id} = $name if $name; | ||||
| 378 | } | ||||
| 379 | return \%tables; | ||||
| 380 | } | ||||
| 381 | |||||
| 382 | =head2 translate | ||||
| 383 | |||||
| 384 | Title : translate | ||||
| 385 | Usage : $obj->translate('YTR') | ||||
| 386 | Function: Returns a string of one letter amino acid codes from | ||||
| 387 | nucleotide sequence input. The imput can be of any length. | ||||
| 388 | |||||
| 389 | Returns 'X' for unknown codons and codons that code for | ||||
| 390 | more than one amino acid. Returns an empty string if input | ||||
| 391 | is not three characters long. Exceptions for these are: | ||||
| 392 | |||||
| 393 | - IUPAC amino acid code B for Aspartic Acid and | ||||
| 394 | Asparagine, is used. | ||||
| 395 | - IUPAC amino acid code Z for Glutamic Acid, Glutamine is | ||||
| 396 | used. | ||||
| 397 | - if the codon is two nucleotides long and if by adding | ||||
| 398 | an a third character 'N', it codes for a single amino | ||||
| 399 | acid (with exceptions above), return that, otherwise | ||||
| 400 | return empty string. | ||||
| 401 | |||||
| 402 | Returns empty string for other input strings that are not | ||||
| 403 | three characters long. | ||||
| 404 | |||||
| 405 | Example : | ||||
| 406 | Returns : a string of one letter ambiguous IUPAC amino acid codes | ||||
| 407 | Args : ambiguous IUPAC nucleotide string | ||||
| 408 | |||||
| 409 | |||||
| 410 | =cut | ||||
| 411 | |||||
| 412 | sub translate { | ||||
| 413 | my ($self, $seq, $complete_codon) = @_; | ||||
| 414 | $self->throw("Calling translate without a seq argument!") unless defined $seq; | ||||
| 415 | return '' unless $seq; | ||||
| 416 | |||||
| 417 | my $id = $self->id; | ||||
| 418 | my ($partial) = 0; | ||||
| 419 | $partial = 2 if length($seq) % CODONSIZE == 2; | ||||
| 420 | |||||
| 421 | $seq = lc $seq; | ||||
| 422 | $seq =~ tr/u/t/; | ||||
| 423 | my $protein = ""; | ||||
| 424 | if ($seq =~ /[^actg]/ ) { #ambiguous chars | ||||
| 425 | for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) { | ||||
| 426 | my $triplet = substr($seq, $i, CODONSIZE); | ||||
| 427 | if( $triplet eq $CODONGAP ) { | ||||
| 428 | $protein .= $GAP; | ||||
| 429 | } elsif (exists $CODONS->{$triplet}) { | ||||
| 430 | $protein .= substr($TABLES[$id-1], | ||||
| 431 | $CODONS->{$triplet},1); | ||||
| 432 | } else { | ||||
| 433 | $protein .= $self->_translate_ambiguous_codon($triplet); | ||||
| 434 | } | ||||
| 435 | } | ||||
| 436 | } else { # simple, strict translation | ||||
| 437 | for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) { | ||||
| 438 | my $triplet = substr($seq, $i, CODONSIZE); | ||||
| 439 | if( $triplet eq $CODONGAP ) { | ||||
| 440 | $protein .= $GAP; | ||||
| 441 | } if (exists $CODONS->{$triplet}) { | ||||
| 442 | $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1); | ||||
| 443 | } else { | ||||
| 444 | $protein .= 'X'; | ||||
| 445 | } | ||||
| 446 | } | ||||
| 447 | } | ||||
| 448 | if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides | ||||
| 449 | my $triplet = substr($seq, ($partial -4)). "n"; | ||||
| 450 | if( $triplet eq $CODONGAP ) { | ||||
| 451 | $protein .= $GAP; | ||||
| 452 | } elsif (exists $CODONS->{$triplet}) { | ||||
| 453 | my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1); | ||||
| 454 | $protein .= $aa; | ||||
| 455 | } else { | ||||
| 456 | $protein .= $self->_translate_ambiguous_codon($triplet, $partial); | ||||
| 457 | } | ||||
| 458 | } | ||||
| 459 | return $protein; | ||||
| 460 | } | ||||
| 461 | |||||
| 462 | sub _translate_ambiguous_codon { | ||||
| 463 | my ($self, $triplet, $partial) = @_; | ||||
| 464 | $partial ||= 0; | ||||
| 465 | my $id = $self->id; | ||||
| 466 | my $aa; | ||||
| 467 | my @codons = $self->unambiguous_codons($triplet); | ||||
| 468 | my %aas =(); | ||||
| 469 | foreach my $codon (@codons) { | ||||
| 470 | $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1; | ||||
| 471 | } | ||||
| 472 | my $count = scalar keys %aas; | ||||
| 473 | if ( $count == 1 ) { | ||||
| 474 | $aa = (keys %aas)[0]; | ||||
| 475 | } | ||||
| 476 | elsif ( $count == 2 ) { | ||||
| 477 | if ($aas{'D'} and $aas{'N'}) { | ||||
| 478 | $aa = 'B'; | ||||
| 479 | } | ||||
| 480 | elsif ($aas{'E'} and $aas{'Q'}) { | ||||
| 481 | $aa = 'Z'; | ||||
| 482 | } else { | ||||
| 483 | $partial ? ($aa = '') : ($aa = 'X'); | ||||
| 484 | } | ||||
| 485 | } else { | ||||
| 486 | $partial ? ($aa = '') : ($aa = 'X'); | ||||
| 487 | } | ||||
| 488 | return $aa; | ||||
| 489 | } | ||||
| 490 | |||||
| 491 | =head2 translate_strict | ||||
| 492 | |||||
| 493 | Title : translate_strict | ||||
| 494 | Usage : $obj->translate_strict('ACT') | ||||
| 495 | Function: returns one letter amino acid code for a codon input | ||||
| 496 | |||||
| 497 | Fast and simple translation. User is responsible to resolve | ||||
| 498 | ambiguous nucleotide codes before calling this | ||||
| 499 | method. Returns 'X' for unknown codons and an empty string | ||||
| 500 | for input strings that are not three characters long. | ||||
| 501 | |||||
| 502 | It is not recommended to use this method in a production | ||||
| 503 | environment. Use method translate, instead. | ||||
| 504 | |||||
| 505 | Example : | ||||
| 506 | Returns : A string | ||||
| 507 | Args : a codon = a three nucleotide character string | ||||
| 508 | |||||
| 509 | |||||
| 510 | =cut | ||||
| 511 | |||||
| 512 | sub translate_strict{ | ||||
| 513 | my ($self, $value) = @_; | ||||
| 514 | my $id = $self->{'id'}; | ||||
| 515 | |||||
| 516 | $value = lc $value; | ||||
| 517 | $value =~ tr/u/t/; | ||||
| 518 | |||||
| 519 | return '' unless length $value == 3; | ||||
| 520 | |||||
| 521 | return 'X' unless defined $CODONS->{$value}; | ||||
| 522 | |||||
| 523 | return substr( $TABLES[$id-1], $CODONS->{$value}, 1 ); | ||||
| 524 | } | ||||
| 525 | |||||
| 526 | =head2 revtranslate | ||||
| 527 | |||||
| 528 | Title : revtranslate | ||||
| 529 | Usage : $obj->revtranslate('G') | ||||
| 530 | Function: returns codons for an amino acid | ||||
| 531 | |||||
| 532 | Returns an empty string for unknown amino acid | ||||
| 533 | codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and | ||||
| 534 | Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three | ||||
| 535 | letter amino acid codes are accepted. '*' and 'Ter' are | ||||
| 536 | used for terminator. | ||||
| 537 | |||||
| 538 | By default, the output codons are shown in DNA. If the | ||||
| 539 | output is needed in RNA (tr/t/u/), add a second argument | ||||
| 540 | 'RNA'. | ||||
| 541 | |||||
| 542 | Example : $obj->revtranslate('Gly', 'RNA') | ||||
| 543 | Returns : An array of three lower case letter strings i.e. codons | ||||
| 544 | Args : amino acid, 'RNA' | ||||
| 545 | |||||
| 546 | =cut | ||||
| 547 | |||||
| 548 | sub revtranslate { | ||||
| 549 | my ($self, $value, $coding) = @_; | ||||
| 550 | my @codons; | ||||
| 551 | |||||
| 552 | if (length($value) == 3 ) { | ||||
| 553 | $value = lc $value; | ||||
| 554 | $value = ucfirst $value; | ||||
| 555 | $value = $THREELETTERSYMBOLS{$value}; | ||||
| 556 | } | ||||
| 557 | if ( defined $value and $value =~ /$VALID_PROTEIN/ | ||||
| 558 | and length($value) == 1 ) { | ||||
| 559 | my $id = $self->{'id'}; | ||||
| 560 | |||||
| 561 | $value = uc $value; | ||||
| 562 | my @aas = @{$IUPAC_AA{$value}}; | ||||
| 563 | foreach my $aa (@aas) { | ||||
| 564 | #print $aa, " -2\n"; | ||||
| 565 | $aa = '\*' if $aa eq '*'; | ||||
| 566 | while ($TABLES[$id-1] =~ m/$aa/g) { | ||||
| 567 | my $p = pos $TABLES[$id-1]; | ||||
| 568 | push (@codons, $TRCOL->{--$p}); | ||||
| 569 | } | ||||
| 570 | } | ||||
| 571 | } | ||||
| 572 | |||||
| 573 | if ($coding and uc ($coding) eq 'RNA') { | ||||
| 574 | for my $i (0..$#codons) { | ||||
| 575 | $codons[$i] =~ tr/t/u/; | ||||
| 576 | } | ||||
| 577 | } | ||||
| 578 | |||||
| 579 | return @codons; | ||||
| 580 | } | ||||
| 581 | |||||
| 582 | =head2 reverse_translate_all | ||||
| 583 | |||||
| 584 | Title : reverse_translate_all | ||||
| 585 | Usage : my $iup_str = $cttable->reverse_translate_all($seq_object) | ||||
| 586 | my $iup_str = $cttable->reverse_translate_all($seq_object, | ||||
| 587 | $cutable, | ||||
| 588 | 15); | ||||
| 589 | Function: reverse translates a protein sequence into IUPAC nucleotide | ||||
| 590 | sequence. An 'X' in the protein sequence is converted to 'NNN' | ||||
| 591 | in the nucleotide sequence. | ||||
| 592 | Returns : a string | ||||
| 593 | Args : a Bio::PrimarySeqI compatible object (mandatory) | ||||
| 594 | a Bio::CodonUsage::Table object and a threshold if only | ||||
| 595 | codons with a relative frequency above the threshold are | ||||
| 596 | to be considered. | ||||
| 597 | =cut | ||||
| 598 | |||||
| 599 | sub reverse_translate_all { | ||||
| 600 | |||||
| 601 | my ($self, $obj, $cut, $threshold) = @_; | ||||
| 602 | |||||
| 603 | ## check args are OK | ||||
| 604 | |||||
| 605 | if (!$obj || !$obj->isa('Bio::PrimarySeqI')){ | ||||
| 606 | $self->throw(" I need a Bio::PrimarySeqI object, not a [". | ||||
| 607 | ref($obj) . "]"); | ||||
| 608 | } | ||||
| 609 | if($obj->alphabet ne 'protein') { | ||||
| 610 | $self->throw("Cannot reverse translate, need an amino acid sequence .". | ||||
| 611 | "This sequence is of type [" . $obj->alphabet ."]"); | ||||
| 612 | } | ||||
| 613 | my @data; | ||||
| 614 | my @seq = split '', $obj->seq; | ||||
| 615 | |||||
| 616 | ## if we're not supplying a codon usage table... | ||||
| 617 | if( !$cut && !$threshold) { | ||||
| 618 | ## get lists of possible codons for each aa. | ||||
| 619 | for my $aa (@seq) { | ||||
| 620 | if ($aa =~ /x/i) { | ||||
| 621 | push @data, (['NNN']); | ||||
| 622 | }else { | ||||
| 623 | my @cods = $self->revtranslate($aa); | ||||
| 624 | push @data, \@cods; | ||||
| 625 | } | ||||
| 626 | } | ||||
| 627 | }else{ | ||||
| 628 | #else we are supplying a codon usage table, we just want common codons | ||||
| 629 | #check args first. | ||||
| 630 | if(!$cut->isa('Bio::CodonUsage::Table')) { | ||||
| 631 | $self->throw("I need a Bio::CodonUsage::Table object, not a [". | ||||
| 632 | ref($cut). "]."); | ||||
| 633 | } | ||||
| 634 | my $cod_ref = $cut->probable_codons($threshold); | ||||
| 635 | for my $aa (@seq) { | ||||
| 636 | if ($aa =~ /x/i) { | ||||
| 637 | push @data, (['NNN']); | ||||
| 638 | next; | ||||
| 639 | } | ||||
| 640 | push @data, $cod_ref->{$aa}; | ||||
| 641 | } | ||||
| 642 | } | ||||
| 643 | |||||
| 644 | return $self->_make_iupac_string(\@data); | ||||
| 645 | |||||
| 646 | } | ||||
| 647 | |||||
| 648 | =head2 reverse_translate_best | ||||
| 649 | |||||
| 650 | Title : reverse_translate_best | ||||
| 651 | Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable); | ||||
| 652 | Function: Reverse translates a protein sequence into plain nucleotide | ||||
| 653 | sequence (GATC), uses the most common codon for each amino acid | ||||
| 654 | Returns : A string | ||||
| 655 | Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object | ||||
| 656 | |||||
| 657 | =cut | ||||
| 658 | |||||
| 659 | sub reverse_translate_best { | ||||
| 660 | |||||
| 661 | my ($self, $obj, $cut) = @_; | ||||
| 662 | |||||
| 663 | if (!$obj || !$obj->isa('Bio::PrimarySeqI')){ | ||||
| 664 | $self->throw(" I need a Bio::PrimarySeqI object, not a [". | ||||
| 665 | ref($obj) . "]"); | ||||
| 666 | } | ||||
| 667 | if ($obj->alphabet ne 'protein') { | ||||
| 668 | $self->throw("Cannot reverse translate, need an amino acid sequence .". | ||||
| 669 | "This sequence is of type [" . $obj->alphabet ."]"); | ||||
| 670 | } | ||||
| 671 | if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) { | ||||
| 672 | $self->throw("I need a Bio::CodonUsage::Table object, not a [". | ||||
| 673 | ref($cut). "]."); | ||||
| 674 | } | ||||
| 675 | |||||
| 676 | my $str = ''; | ||||
| 677 | my @seq = split '', $obj->seq; | ||||
| 678 | |||||
| 679 | my $cod_ref = $cut->most_common_codons(); | ||||
| 680 | |||||
| 681 | for my $aa ( @seq ) { | ||||
| 682 | if ($aa =~ /x/i) { | ||||
| 683 | $str .= 'NNN'; | ||||
| 684 | next; | ||||
| 685 | } | ||||
| 686 | if ( defined $cod_ref->{$aa} ) { | ||||
| 687 | $str .= $cod_ref->{$aa}; | ||||
| 688 | } else { | ||||
| 689 | $self->throw("Input sequence contains invalid character: $aa"); | ||||
| 690 | } | ||||
| 691 | } | ||||
| 692 | $str; | ||||
| 693 | } | ||||
| 694 | |||||
| 695 | =head2 is_start_codon | ||||
| 696 | |||||
| 697 | Title : is_start_codon | ||||
| 698 | Usage : $obj->is_start_codon('ATG') | ||||
| 699 | Function: returns true (1) for all codons that can be used as a | ||||
| 700 | translation start, false (0) for others. | ||||
| 701 | Example : $myCodonTable->is_start_codon('ATG') | ||||
| 702 | Returns : boolean | ||||
| 703 | Args : codon | ||||
| 704 | |||||
| 705 | =cut | ||||
| 706 | |||||
| 707 | sub is_start_codon{ | ||||
| 708 | shift->_codon_is( shift, \@STARTS, 'M' ); | ||||
| 709 | } | ||||
| 710 | |||||
| 711 | =head2 is_ter_codon | ||||
| 712 | |||||
| 713 | Title : is_ter_codon | ||||
| 714 | Usage : $obj->is_ter_codon('GAA') | ||||
| 715 | Function: returns true (1) for all codons that can be used as a | ||||
| 716 | translation tarminator, false (0) for others. | ||||
| 717 | Example : $myCodonTable->is_ter_codon('ATG') | ||||
| 718 | Returns : boolean | ||||
| 719 | Args : codon | ||||
| 720 | |||||
| 721 | =cut | ||||
| 722 | |||||
| 723 | sub is_ter_codon{ | ||||
| 724 | shift->_codon_is( shift, \@TABLES, $TERMINATOR ); | ||||
| 725 | } | ||||
| 726 | |||||
| 727 | # desc: compares the passed value with a single entry in the given | ||||
| 728 | # codon table | ||||
| 729 | # args: a value (typically a three-char string like 'atg'), | ||||
| 730 | # a reference to the appropriate set of codon tables, | ||||
| 731 | # a single-character value to check for at the position in the | ||||
| 732 | # given codon table | ||||
| 733 | # ret: boolean, true if the given codon table contains the $key at the | ||||
| 734 | # position corresponding to $value | ||||
| 735 | sub _codon_is { | ||||
| 736 | my ($self, $value, $table, $key ) = @_; | ||||
| 737 | |||||
| 738 | return 0 unless length $value == 3; | ||||
| 739 | |||||
| 740 | $value = lc $value; | ||||
| 741 | $value =~ tr/u/t/; | ||||
| 742 | |||||
| 743 | my $id = $self->{'id'}; | ||||
| 744 | for my $c ( $self->unambiguous_codons($value) ) { | ||||
| 745 | my $m = substr( $table->[$id-1], $CODONS->{$c}, 1 ); | ||||
| 746 | return 0 unless $m eq $key; | ||||
| 747 | } | ||||
| 748 | return 1; | ||||
| 749 | } | ||||
| 750 | |||||
| 751 | =head2 is_unknown_codon | ||||
| 752 | |||||
| 753 | Title : is_unknown_codon | ||||
| 754 | Usage : $obj->is_unknown_codon('GAJ') | ||||
| 755 | Function: returns false (0) for all codons that are valid, | ||||
| 756 | true (1) for others. | ||||
| 757 | Example : $myCodonTable->is_unknown_codon('NTG') | ||||
| 758 | Returns : boolean | ||||
| 759 | Args : codon | ||||
| 760 | |||||
| 761 | |||||
| 762 | =cut | ||||
| 763 | |||||
| 764 | sub is_unknown_codon{ | ||||
| 765 | my ($self, $value) = @_; | ||||
| 766 | $value = lc $value; | ||||
| 767 | $value =~ tr/u/t/; | ||||
| 768 | return 1 unless $self->unambiguous_codons($value); | ||||
| 769 | return 0; | ||||
| 770 | } | ||||
| 771 | |||||
| 772 | =head2 unambiguous_codons | ||||
| 773 | |||||
| 774 | Title : unambiguous_codons | ||||
| 775 | Usage : @codons = $self->unambiguous_codons('ACN') | ||||
| 776 | Returns : array of strings (one-letter unambiguous amino acid codes) | ||||
| 777 | Args : a codon = a three IUPAC nucleotide character string | ||||
| 778 | |||||
| 779 | =cut | ||||
| 780 | |||||
| 781 | sub unambiguous_codons{ | ||||
| 782 | my ($self,$value) = @_; | ||||
| 783 | my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value); | ||||
| 784 | |||||
| 785 | my @codons; | ||||
| 786 | for my $i ( @{$nts[0]} ) { | ||||
| 787 | for my $j ( @{$nts[1]} ) { | ||||
| 788 | for my $k ( @{$nts[2]} ) { | ||||
| 789 | push @codons, lc "$i$j$k"; | ||||
| 790 | }}} | ||||
| 791 | return @codons; | ||||
| 792 | } | ||||
| 793 | |||||
| 794 | =head2 _unambiquous_codons | ||||
| 795 | |||||
| 796 | deprecated, now an alias for unambiguous_codons | ||||
| 797 | |||||
| 798 | =cut | ||||
| 799 | |||||
| 800 | sub _unambiquous_codons { | ||||
| 801 | unambiguous_codons( undef, @_ ); | ||||
| 802 | } | ||||
| 803 | |||||
| 804 | =head2 add_table | ||||
| 805 | |||||
| 806 | Title : add_table | ||||
| 807 | Usage : $newid = $ct->add_table($name, $table, $starts) | ||||
| 808 | Function: Add a custom Codon Table into the object. | ||||
| 809 | Know what you are doing, only the length of | ||||
| 810 | the argument strings is checked! | ||||
| 811 | Returns : the id of the new codon table | ||||
| 812 | Args : name, a string, optional (can be empty) | ||||
| 813 | table, a string of 64 characters | ||||
| 814 | startcodons, a string of 64 characters, defaults to standard | ||||
| 815 | |||||
| 816 | =cut | ||||
| 817 | |||||
| 818 | sub add_table { | ||||
| 819 | my ($self, $name, $table, $starts) = @_; | ||||
| 820 | |||||
| 821 | $name ||= 'Custom'. scalar @NAMES + 1; | ||||
| 822 | $starts ||= $STARTS[0]; | ||||
| 823 | $self->throw('Suspect input!') | ||||
| 824 | unless length($table) == 64 and length($starts) == 64; | ||||
| 825 | |||||
| 826 | push @NAMES, $name; | ||||
| 827 | push @TABLES, $table; | ||||
| 828 | push @STARTS, $starts; | ||||
| 829 | |||||
| 830 | return scalar @NAMES; | ||||
| 831 | |||||
| 832 | } | ||||
| 833 | |||||
| 834 | sub _make_iupac_string { | ||||
| 835 | |||||
| 836 | my ($self, $cod_ref) = @_; | ||||
| 837 | if(ref($cod_ref) ne 'ARRAY') { | ||||
| 838 | $self->throw(" I need a reference to a list of references to codons, ". | ||||
| 839 | " not a [". ref($cod_ref) . "]."); | ||||
| 840 | } | ||||
| 841 | my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub(); | ||||
| 842 | my $iupac_string = ''; ## the string to be returned | ||||
| 843 | for my $aa (@$cod_ref) { | ||||
| 844 | |||||
| 845 | ## scan through codon positions, record the differing values, | ||||
| 846 | # then look up in the iub hash | ||||
| 847 | for my $index(0..2) { | ||||
| 848 | my %h; | ||||
| 849 | map { my $k = substr($_,$index,1); | ||||
| 850 | $h{$k} = undef;} @$aa; | ||||
| 851 | my $lookup_key = join '', sort{$a cmp $b}keys %h; | ||||
| 852 | |||||
| 853 | ## extend string | ||||
| 854 | $iupac_string .= $iupac_hash{uc$lookup_key}; | ||||
| 855 | } | ||||
| 856 | } | ||||
| 857 | return $iupac_string; | ||||
| 858 | |||||
| 859 | } | ||||
| 860 | |||||
| 861 | |||||
| 862 | 1 | 3µs | 1; |