| Filename | /Users/ap13/perl5/lib/perl5/Bio/Tools/GuessSeqFormat.pm |
| Statements | Executed 8 statements in 2.47ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 13µs | 26µs | Bio::Tools::GuessSeqFormat::BEGIN@247 |
| 1 | 1 | 1 | 8µs | 13µs | Bio::Tools::GuessSeqFormat::BEGIN@248 |
| 1 | 1 | 1 | 8µs | 70µs | Bio::Tools::GuessSeqFormat::BEGIN@251 |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_ace |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_blast |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_bowtie |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_clustalw |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_codata |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_embl |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_fasta |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_fastq |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_fastxy |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_game |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_gcg |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_gcgblast |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_gcgfasta |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_gde |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_genbank |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_genscan |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_gff |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_hmmer |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_mase |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_mega |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_msf |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_nexus |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_pfam |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_phrap |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_phylip |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_pir |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_prodom |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_raw |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_rsf |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_selex |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_stockholm |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_swiss |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_tab |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::_possibly_vcf |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::fh |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::file |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::guess |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::new |
| 0 | 0 | 0 | 0s | 0s | Bio::Tools::GuessSeqFormat::text |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | #------------------------------------------------------------------ | ||||
| 2 | # | ||||
| 3 | # BioPerl module Bio::Tools::GuessSeqFormat | ||||
| 4 | # | ||||
| 5 | # Please direct questions and support issues to <bioperl-l@bioperl.org> | ||||
| 6 | # | ||||
| 7 | # Cared for by Andreas Kähäri, andreas.kahari@ebi.ac.uk | ||||
| 8 | # | ||||
| 9 | # You may distribute this module under the same terms as perl itself | ||||
| 10 | #------------------------------------------------------------------ | ||||
| 11 | |||||
| 12 | =encoding utf-8 | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | Bio::Tools::GuessSeqFormat - Module for determining the sequence | ||||
| 17 | format of the contents of a file, a string, or through a | ||||
| 18 | filehandle. | ||||
| 19 | |||||
| 20 | =head1 SYNOPSIS | ||||
| 21 | |||||
| 22 | # To guess the format of a flat file, given a filename: | ||||
| 23 | my $guesser = Bio::Tools::GuessSeqFormat->new( -file => $filename ); | ||||
| 24 | my $format = $guesser->guess; | ||||
| 25 | |||||
| 26 | # To guess the format from an already open filehandle: | ||||
| 27 | my $guesser = Bio::Tools::GuessSeqFormat->new( -fh => $filehandle ); | ||||
| 28 | my $format = $guesser->guess; | ||||
| 29 | # The filehandle will be returned to its original position. Note that this | ||||
| 30 | # filehandle can be STDIN. | ||||
| 31 | |||||
| 32 | # To guess the format of one or several lines of text (with | ||||
| 33 | # embedded newlines): | ||||
| 34 | my $guesser = Bio::Tools::GuessSeqFormat->new( -text => $linesoftext ); | ||||
| 35 | my $format = $guesser->guess; | ||||
| 36 | |||||
| 37 | # To create a Bio::Tools::GuessSeqFormat object and set the | ||||
| 38 | # filename, filehandle, or line to parse afterwards: | ||||
| 39 | my $guesser = Bio::Tools::GuessSeqFormat->new(); | ||||
| 40 | $guesser->file($filename); | ||||
| 41 | $guesser->fh($filehandle); | ||||
| 42 | $guesser->text($linesoftext); | ||||
| 43 | |||||
| 44 | # To guess in one go, given e.g. a filename: | ||||
| 45 | my $format = Bio::Tools::GuessSeqFormat->new( -file => $filename )->guess; | ||||
| 46 | |||||
| 47 | =head1 DESCRIPTION | ||||
| 48 | |||||
| 49 | Bio::Tools::GuessSeqFormat tries to guess the format ("swiss", | ||||
| 50 | "pir", "fasta" etc.) of the sequence or MSA in a file, in a | ||||
| 51 | scalar, or through a filehandle. | ||||
| 52 | |||||
| 53 | The guess() method of a Bio::Tools::GuessSeqFormat object will | ||||
| 54 | examine the data, line by line, until it finds a line to which | ||||
| 55 | only one format can be assigned. If no conclusive guess can be | ||||
| 56 | made, undef is returned. | ||||
| 57 | |||||
| 58 | If the Bio::Tools::GuessSeqFormat object is given a filehandle, | ||||
| 59 | e.g. STDIN, it will be restored to its original position on | ||||
| 60 | return from the guess() method. | ||||
| 61 | |||||
| 62 | =head2 Formats | ||||
| 63 | |||||
| 64 | Tests are currently implemented for the following formats: | ||||
| 65 | |||||
| 66 | =over | ||||
| 67 | |||||
| 68 | =item * | ||||
| 69 | |||||
| 70 | ACeDB ("ace") | ||||
| 71 | |||||
| 72 | =item * | ||||
| 73 | |||||
| 74 | Blast ("blast") | ||||
| 75 | |||||
| 76 | =item * | ||||
| 77 | |||||
| 78 | ClustalW ("clustalw") | ||||
| 79 | |||||
| 80 | =item * | ||||
| 81 | |||||
| 82 | Codata ("codata") | ||||
| 83 | |||||
| 84 | =item * | ||||
| 85 | |||||
| 86 | EMBL ("embl") | ||||
| 87 | |||||
| 88 | =item * | ||||
| 89 | |||||
| 90 | FastA sequence ("fasta") | ||||
| 91 | |||||
| 92 | =item * | ||||
| 93 | |||||
| 94 | FastQ sequence ("fastq") | ||||
| 95 | |||||
| 96 | =item * | ||||
| 97 | |||||
| 98 | FastXY/FastA alignment ("fastxy") | ||||
| 99 | |||||
| 100 | =item * | ||||
| 101 | |||||
| 102 | Game XML ("game") | ||||
| 103 | |||||
| 104 | =item * | ||||
| 105 | |||||
| 106 | GCG ("gcg") | ||||
| 107 | |||||
| 108 | =item * | ||||
| 109 | |||||
| 110 | GCG Blast ("gcgblast") | ||||
| 111 | |||||
| 112 | =item * | ||||
| 113 | |||||
| 114 | GCG FastA ("gcgfasta") | ||||
| 115 | |||||
| 116 | =item * | ||||
| 117 | |||||
| 118 | GDE ("gde") | ||||
| 119 | |||||
| 120 | =item * | ||||
| 121 | |||||
| 122 | Genbank ("genbank") | ||||
| 123 | |||||
| 124 | =item * | ||||
| 125 | |||||
| 126 | Genscan ("genscan") | ||||
| 127 | |||||
| 128 | =item * | ||||
| 129 | |||||
| 130 | GFF ("gff") | ||||
| 131 | |||||
| 132 | =item * | ||||
| 133 | |||||
| 134 | HMMER ("hmmer") | ||||
| 135 | |||||
| 136 | =item * | ||||
| 137 | |||||
| 138 | PAUP/NEXUS ("nexus") | ||||
| 139 | |||||
| 140 | =item * | ||||
| 141 | |||||
| 142 | Phrap assembly file ("phrap") | ||||
| 143 | |||||
| 144 | =item * | ||||
| 145 | |||||
| 146 | NBRF/PIR ("pir") | ||||
| 147 | |||||
| 148 | =item * | ||||
| 149 | |||||
| 150 | Mase ("mase") | ||||
| 151 | |||||
| 152 | =item * | ||||
| 153 | |||||
| 154 | Mega ("mega") | ||||
| 155 | |||||
| 156 | =item * | ||||
| 157 | |||||
| 158 | GCG/MSF ("msf") | ||||
| 159 | |||||
| 160 | =item * | ||||
| 161 | |||||
| 162 | Pfam ("pfam") | ||||
| 163 | |||||
| 164 | =item * | ||||
| 165 | |||||
| 166 | Phylip ("phylip") | ||||
| 167 | |||||
| 168 | =item * | ||||
| 169 | |||||
| 170 | Prodom ("prodom") | ||||
| 171 | |||||
| 172 | =item * | ||||
| 173 | |||||
| 174 | Raw ("raw") | ||||
| 175 | |||||
| 176 | =item * | ||||
| 177 | |||||
| 178 | RSF ("rsf") | ||||
| 179 | |||||
| 180 | =item * | ||||
| 181 | |||||
| 182 | Selex ("selex") | ||||
| 183 | |||||
| 184 | =item * | ||||
| 185 | |||||
| 186 | Stockholm ("stockholm") | ||||
| 187 | |||||
| 188 | =item * | ||||
| 189 | |||||
| 190 | Swissprot ("swiss") | ||||
| 191 | |||||
| 192 | =item * | ||||
| 193 | |||||
| 194 | Tab ("tab") | ||||
| 195 | |||||
| 196 | =item * | ||||
| 197 | |||||
| 198 | Variant Call Format ("vcf") | ||||
| 199 | |||||
| 200 | =back | ||||
| 201 | |||||
| 202 | =head1 FEEDBACK | ||||
| 203 | |||||
| 204 | =head2 Mailing Lists | ||||
| 205 | |||||
| 206 | User feedback is an integral part of the evolution of this and | ||||
| 207 | other Bioperl modules. Send your comments and suggestions | ||||
| 208 | preferably to one of the Bioperl mailing lists. Your | ||||
| 209 | participation is much appreciated. | ||||
| 210 | |||||
| 211 | bioperl-l@bioperl.org - General discussion | ||||
| 212 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | ||||
| 213 | |||||
| 214 | =head2 Support | ||||
| 215 | |||||
| 216 | Please direct usage questions or support issues to the mailing list: | ||||
| 217 | |||||
| 218 | I<bioperl-l@bioperl.org> | ||||
| 219 | |||||
| 220 | rather than to the module maintainer directly. Many experienced and | ||||
| 221 | reponsive experts will be able look at the problem and quickly | ||||
| 222 | address it. Please include a thorough description of the problem | ||||
| 223 | with code and data examples if at all possible. | ||||
| 224 | |||||
| 225 | =head2 Reporting Bugs | ||||
| 226 | |||||
| 227 | Report bugs to the Bioperl bug tracking system to help us | ||||
| 228 | keep track the bugs and their resolution. Bug reports can be | ||||
| 229 | submitted via the web: | ||||
| 230 | |||||
| 231 | https://github.com/bioperl/bioperl-live/issues | ||||
| 232 | |||||
| 233 | =head1 AUTHOR | ||||
| 234 | |||||
| 235 | Andreas KE<228>hE<228>ri, andreas.kahari@ebi.ac.uk | ||||
| 236 | |||||
| 237 | =head1 CONTRIBUTORS | ||||
| 238 | |||||
| 239 | Heikki LehvE<228>slaiho, heikki-at-bioperl-dot-org | ||||
| 240 | Mark A. Jensen, maj-at-fortinbras-dot-us | ||||
| 241 | |||||
| 242 | =cut | ||||
| 243 | |||||
| 244 | |||||
| 245 | package Bio::Tools::GuessSeqFormat; | ||||
| 246 | |||||
| 247 | 2 | 24µs | 2 | 39µs | # spent 26µs (13+13) within Bio::Tools::GuessSeqFormat::BEGIN@247 which was called:
# once (13µs+13µs) by Bio::SeqIO::BEGIN@332 at line 247 # spent 26µs making 1 call to Bio::Tools::GuessSeqFormat::BEGIN@247
# spent 13µs making 1 call to strict::import |
| 248 | 2 | 24µs | 2 | 18µs | # spent 13µs (8+5) within Bio::Tools::GuessSeqFormat::BEGIN@248 which was called:
# once (8µs+5µs) by Bio::SeqIO::BEGIN@332 at line 248 # spent 13µs making 1 call to Bio::Tools::GuessSeqFormat::BEGIN@248
# spent 5µs making 1 call to warnings::import |
| 249 | |||||
| 250 | |||||
| 251 | 2 | 2.35ms | 2 | 132µs | # spent 70µs (8+62) within Bio::Tools::GuessSeqFormat::BEGIN@251 which was called:
# once (8µs+62µs) by Bio::SeqIO::BEGIN@332 at line 251 # spent 70µs making 1 call to Bio::Tools::GuessSeqFormat::BEGIN@251
# spent 62µs making 1 call to base::import |
| 252 | |||||
| 253 | =head1 METHODS | ||||
| 254 | |||||
| 255 | Methods available to Bio::Tools::GuessSeqFormat objects | ||||
| 256 | are described below. Methods with names beginning with an | ||||
| 257 | underscore are considered to be internal. | ||||
| 258 | |||||
| 259 | =cut | ||||
| 260 | |||||
| 261 | =head2 new | ||||
| 262 | |||||
| 263 | Title : new | ||||
| 264 | Usage : $guesser = Bio::Tools::GuessSeqFormat->new( ... ); | ||||
| 265 | Function : Creates a new object. | ||||
| 266 | Example : See SYNOPSIS. | ||||
| 267 | Returns : A new object. | ||||
| 268 | Arguments : -file The filename of the file whose format is to | ||||
| 269 | be guessed, e.g. STDIN, or | ||||
| 270 | -fh An already opened filehandle from which a text | ||||
| 271 | stream may be read, or | ||||
| 272 | -text A scalar containing one or several lines of | ||||
| 273 | text with embedded newlines. | ||||
| 274 | |||||
| 275 | If more than one of the above arguments are given, they | ||||
| 276 | are tested in the order -text, -file, -fh, and the first | ||||
| 277 | available argument will be used. | ||||
| 278 | |||||
| 279 | =cut | ||||
| 280 | |||||
| 281 | sub new | ||||
| 282 | { | ||||
| 283 | my $class = shift; | ||||
| 284 | my @args = @_; | ||||
| 285 | |||||
| 286 | my $self = $class->SUPER::new(@args); | ||||
| 287 | |||||
| 288 | my $attr; | ||||
| 289 | my $value; | ||||
| 290 | |||||
| 291 | while (@args) { | ||||
| 292 | $attr = shift @args; | ||||
| 293 | $attr = lc $attr; | ||||
| 294 | $value = shift @args; | ||||
| 295 | $self->{$attr} = $value; | ||||
| 296 | } | ||||
| 297 | |||||
| 298 | return $self; | ||||
| 299 | } | ||||
| 300 | |||||
| 301 | =head2 file | ||||
| 302 | |||||
| 303 | Title : file | ||||
| 304 | Usage : $guesser->file($filename); | ||||
| 305 | $filename = $guesser->file; | ||||
| 306 | Function : Gets or sets the current filename associated with | ||||
| 307 | an object. | ||||
| 308 | Returns : The new filename. | ||||
| 309 | Arguments : The filename of the file whose format is to be | ||||
| 310 | guessed. | ||||
| 311 | |||||
| 312 | A call to this method will clear the current filehandle and | ||||
| 313 | the current lines of text associated with the object. | ||||
| 314 | |||||
| 315 | =cut | ||||
| 316 | |||||
| 317 | sub file | ||||
| 318 | { | ||||
| 319 | # Sets and/or returns the filename to use. | ||||
| 320 | my $self = shift; | ||||
| 321 | my $file = shift; | ||||
| 322 | |||||
| 323 | if (defined $file) { | ||||
| 324 | # Set the active filename, and clear the filehandle and | ||||
| 325 | # text line, if present. | ||||
| 326 | $self->{-file} = $file; | ||||
| 327 | $self->{-fh} = $self->{-text} = undef; | ||||
| 328 | } | ||||
| 329 | |||||
| 330 | return $self->{-file}; | ||||
| 331 | } | ||||
| 332 | |||||
| 333 | =head2 fh | ||||
| 334 | |||||
| 335 | Title : fh | ||||
| 336 | Usage : $guesser->fh($filehandle); | ||||
| 337 | $filehandle = $guesser->fh; | ||||
| 338 | Function : Gets or sets the current filehandle associated with | ||||
| 339 | an object. | ||||
| 340 | Returns : The new filehandle. | ||||
| 341 | Arguments : An already opened filehandle from which a text | ||||
| 342 | stream may be read. | ||||
| 343 | |||||
| 344 | A call to this method will clear the current filename and | ||||
| 345 | the current lines of text associated with the object. | ||||
| 346 | |||||
| 347 | =cut | ||||
| 348 | |||||
| 349 | sub fh | ||||
| 350 | { | ||||
| 351 | # Sets and/or returns the filehandle to use. | ||||
| 352 | my $self = shift; | ||||
| 353 | my $fh = shift; | ||||
| 354 | |||||
| 355 | if (defined $fh) { | ||||
| 356 | # Set the active filehandle, and clear the filename and | ||||
| 357 | # text line, if present. | ||||
| 358 | $self->{-fh} = $fh; | ||||
| 359 | $self->{-file} = $self->{-text} = undef; | ||||
| 360 | } | ||||
| 361 | |||||
| 362 | return $self->{-fh}; | ||||
| 363 | } | ||||
| 364 | |||||
| 365 | |||||
| 366 | =head2 text | ||||
| 367 | |||||
| 368 | Title : text | ||||
| 369 | Usage : $guesser->text($linesoftext); | ||||
| 370 | $linesofext = $guesser->text; | ||||
| 371 | Function : Gets or sets the current text associated with an | ||||
| 372 | object. | ||||
| 373 | Returns : The new lines of texts. | ||||
| 374 | Arguments : A scalar containing one or several lines of text, | ||||
| 375 | including embedded newlines. | ||||
| 376 | |||||
| 377 | A call to this method will clear the current filename and | ||||
| 378 | the current filehandle associated with the object. | ||||
| 379 | |||||
| 380 | =cut | ||||
| 381 | |||||
| 382 | sub text | ||||
| 383 | { | ||||
| 384 | # Sets and/or returns the text lines to use. | ||||
| 385 | my $self = shift; | ||||
| 386 | my $text = shift; | ||||
| 387 | |||||
| 388 | if (defined $text) { | ||||
| 389 | # Set the active text lines, and clear the filehandle | ||||
| 390 | # and filename, if present. | ||||
| 391 | $self->{-text} = $text; | ||||
| 392 | $self->{-fh} = $self->{-file} = undef; | ||||
| 393 | } | ||||
| 394 | |||||
| 395 | return $self->{-text}; | ||||
| 396 | } | ||||
| 397 | |||||
| 398 | =head2 guess | ||||
| 399 | |||||
| 400 | Title : guess | ||||
| 401 | Usage : $format = $guesser->guess; | ||||
| 402 | @format = $guesser->guess; # if given a line of text | ||||
| 403 | Function : Guesses the format of the data accociated with the | ||||
| 404 | object. | ||||
| 405 | Returns : A format string such as "swiss" or "pir". If a | ||||
| 406 | format can not be found, undef is returned. | ||||
| 407 | Arguments : None. | ||||
| 408 | |||||
| 409 | If the object is associated with a filehandle, the position | ||||
| 410 | of the filehandle will be returned to its original position | ||||
| 411 | before the method returns. | ||||
| 412 | |||||
| 413 | =cut | ||||
| 414 | |||||
| 415 | 1 | 26µs | our %formats = ( | ||
| 416 | ace => { test => \&_possibly_ace }, | ||||
| 417 | blast => { test => \&_possibly_blast }, | ||||
| 418 | bowtie => { test => \&_possibly_bowtie }, | ||||
| 419 | clustalw => { test => \&_possibly_clustalw }, | ||||
| 420 | codata => { test => \&_possibly_codata }, | ||||
| 421 | embl => { test => \&_possibly_embl }, | ||||
| 422 | fasta => { test => \&_possibly_fasta }, | ||||
| 423 | fastq => { test => \&_possibly_fastq }, | ||||
| 424 | fastxy => { test => \&_possibly_fastxy }, | ||||
| 425 | game => { test => \&_possibly_game }, | ||||
| 426 | gcg => { test => \&_possibly_gcg }, | ||||
| 427 | gcgblast => { test => \&_possibly_gcgblast }, | ||||
| 428 | gcgfasta => { test => \&_possibly_gcgfasta }, | ||||
| 429 | gde => { test => \&_possibly_gde }, | ||||
| 430 | genbank => { test => \&_possibly_genbank }, | ||||
| 431 | genscan => { test => \&_possibly_genscan }, | ||||
| 432 | gff => { test => \&_possibly_gff }, | ||||
| 433 | hmmer => { test => \&_possibly_hmmer }, | ||||
| 434 | nexus => { test => \&_possibly_nexus }, | ||||
| 435 | mase => { test => \&_possibly_mase }, | ||||
| 436 | mega => { test => \&_possibly_mega }, | ||||
| 437 | msf => { test => \&_possibly_msf }, | ||||
| 438 | pfam => { test => \&_possibly_pfam }, | ||||
| 439 | phrap => { test => \&_possibly_phrap }, | ||||
| 440 | phylip => { test => \&_possibly_phylip }, | ||||
| 441 | pir => { test => \&_possibly_pir }, | ||||
| 442 | prodom => { test => \&_possibly_prodom }, | ||||
| 443 | raw => { test => \&_possibly_raw }, | ||||
| 444 | rsf => { test => \&_possibly_rsf }, | ||||
| 445 | selex => { test => \&_possibly_selex }, | ||||
| 446 | stockholm => { test => \&_possibly_stockholm }, | ||||
| 447 | swiss => { test => \&_possibly_swiss }, | ||||
| 448 | tab => { test => \&_possibly_tab }, | ||||
| 449 | vcf => { test => \&_possibly_vcf }, | ||||
| 450 | ); | ||||
| 451 | |||||
| 452 | sub guess | ||||
| 453 | { | ||||
| 454 | my $self = shift; | ||||
| 455 | |||||
| 456 | while (my ($fmt_key) = each (%formats)) { | ||||
| 457 | $formats{$fmt_key}{fmt_string} = $fmt_key; | ||||
| 458 | } | ||||
| 459 | |||||
| 460 | my $fh; | ||||
| 461 | my $start_pos; | ||||
| 462 | if (defined $self->{-text}) { | ||||
| 463 | # Break the text into separate lines. | ||||
| 464 | my $text = $self->{-text}; | ||||
| 465 | open $fh, '<', \$text or $self->throw("Could not read from string: $!"); | ||||
| 466 | |||||
| 467 | } elsif (defined $self->{-file}) { | ||||
| 468 | # If given a filename, open the file. | ||||
| 469 | my $file = $self->{-file}; | ||||
| 470 | open $fh, '<', $file or $self->throw("Could not read file '$file': $!"); | ||||
| 471 | |||||
| 472 | } elsif (defined $self->{-fh}) { | ||||
| 473 | # If given a filehandle, get the current position in the stream. | ||||
| 474 | $fh = $self->{-fh}; | ||||
| 475 | if (not seek $fh, 0, 1) { # seek to current position to determine seekability | ||||
| 476 | # Work around non-seekable filehandles if IO::Scalar is available | ||||
| 477 | # (adapted from http://www.perlmonks.org/?node_id=33587) | ||||
| 478 | # IO::Mark may be an option for very large streams? | ||||
| 479 | $self->throw("Need IO::Scalar to guess from unseekable filehandles") | ||||
| 480 | if not eval { require IO::Scalar }; | ||||
| 481 | my $data; | ||||
| 482 | { local $/; $data = <$fh>; $.-- }; # copy raw data from fh | ||||
| 483 | tie *$fh, 'IO::Scalar', my $s; # replace fh by scalar-tied fh | ||||
| 484 | print $fh $data; # write raw data to tied fh | ||||
| 485 | seek $fh, 0, 0; # return to start of tied fh | ||||
| 486 | } | ||||
| 487 | $start_pos = tell $fh; | ||||
| 488 | } | ||||
| 489 | |||||
| 490 | my $done = 0; | ||||
| 491 | my $lineno = 0; | ||||
| 492 | my $guess; | ||||
| 493 | while (!$done) { | ||||
| 494 | my $line; # The next line of the file. | ||||
| 495 | my $match = 0; # Number of possible formats of this line. | ||||
| 496 | |||||
| 497 | last if (!defined($line = <$fh>)); | ||||
| 498 | next if ($line =~ /^\s*$/); # Skip white and empty lines. | ||||
| 499 | chomp $line; | ||||
| 500 | $line =~ s/\r$//; # Fix for DOS files on Unix. | ||||
| 501 | ++$lineno; | ||||
| 502 | |||||
| 503 | while (my ($fmt_key, $fmt) = each (%formats)) { | ||||
| 504 | if ($fmt->{test}($line, $lineno)) { | ||||
| 505 | ++$match; | ||||
| 506 | $guess = $fmt->{fmt_string}; | ||||
| 507 | } | ||||
| 508 | } | ||||
| 509 | |||||
| 510 | # We're done if there was only one match. | ||||
| 511 | $done = ($match == 1); | ||||
| 512 | } | ||||
| 513 | |||||
| 514 | if (defined $self->{-fh}) { | ||||
| 515 | # Go back to original position in filehandle | ||||
| 516 | seek $fh, $start_pos, 0 or $self->throw("Could not reset filehandle $fh: $!"); | ||||
| 517 | } else { | ||||
| 518 | # Close the filehandle we opened | ||||
| 519 | close $fh; | ||||
| 520 | } | ||||
| 521 | return ($done ? $guess : undef); | ||||
| 522 | } | ||||
| 523 | |||||
| 524 | =head1 HELPER SUBROUTINES | ||||
| 525 | |||||
| 526 | All helper subroutines will, given a line of text and the line | ||||
| 527 | number of the same line, return 1 if the line possibly is from a | ||||
| 528 | file of the type that they perform a test of. | ||||
| 529 | |||||
| 530 | A zero return value does not mean that the line is not part | ||||
| 531 | of a certain type of file, just that the test did not find any | ||||
| 532 | characteristics of that type of file in the line. | ||||
| 533 | |||||
| 534 | =head2 _possibly_ace | ||||
| 535 | |||||
| 536 | From bioperl test data, and from | ||||
| 537 | "http://www.isrec.isb-sib.ch/DEA/module8/B_Stevenson/Practicals/transcriptome_recon/transcriptome_recon.html". | ||||
| 538 | |||||
| 539 | =cut | ||||
| 540 | |||||
| 541 | sub _possibly_ace | ||||
| 542 | { | ||||
| 543 | my ($line, $lineno) = (shift, shift); | ||||
| 544 | return ($line =~ /^(?:Sequence|Peptide|DNA|Protein) [":]/); | ||||
| 545 | } | ||||
| 546 | |||||
| 547 | =head2 _possibly_blast | ||||
| 548 | |||||
| 549 | From various blast results. | ||||
| 550 | |||||
| 551 | =cut | ||||
| 552 | |||||
| 553 | sub _possibly_blast | ||||
| 554 | { | ||||
| 555 | my ($line, $lineno) = (shift, shift); | ||||
| 556 | return ($lineno == 1 && | ||||
| 557 | $line =~ /^[[:upper:]]*BLAST[[:upper:]]*.*\[.*\]$/); | ||||
| 558 | } | ||||
| 559 | |||||
| 560 | =head2 _possibly_bowtie | ||||
| 561 | |||||
| 562 | Contributed by kortsch. | ||||
| 563 | |||||
| 564 | =cut | ||||
| 565 | |||||
| 566 | sub _possibly_bowtie | ||||
| 567 | { | ||||
| 568 | my ($line, $lineno) = (shift, shift); | ||||
| 569 | return ($line =~ /^[[:graph:]]+\t[-+]\t[[:graph:]]+\t\d+\t([[:alpha:]]+)\t([[:graph:]]+)\t\d+\t[[:graph:]]?/) | ||||
| 570 | && length($1)==length($2); | ||||
| 571 | } | ||||
| 572 | |||||
| 573 | =head2 _possibly_clustalw | ||||
| 574 | |||||
| 575 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 576 | |||||
| 577 | =cut | ||||
| 578 | |||||
| 579 | sub _possibly_clustalw | ||||
| 580 | { | ||||
| 581 | my ($line, $lineno) = (shift, shift); | ||||
| 582 | return ($lineno == 1 && $line =~ /CLUSTAL/); | ||||
| 583 | } | ||||
| 584 | |||||
| 585 | =head2 _possibly_codata | ||||
| 586 | |||||
| 587 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 588 | |||||
| 589 | =cut | ||||
| 590 | |||||
| 591 | sub _possibly_codata | ||||
| 592 | { | ||||
| 593 | my ($line, $lineno) = (shift, shift); | ||||
| 594 | return (($lineno == 1 && $line =~ /^ENTRY/) || | ||||
| 595 | ($lineno == 2 && $line =~ /^SEQUENCE/) || | ||||
| 596 | $line =~ m{^(?:ENTRY|SEQUENCE|///)}); | ||||
| 597 | } | ||||
| 598 | |||||
| 599 | =head2 _possibly_embl | ||||
| 600 | |||||
| 601 | From | ||||
| 602 | "http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3.3". | ||||
| 603 | |||||
| 604 | =cut | ||||
| 605 | |||||
| 606 | sub _possibly_embl | ||||
| 607 | { | ||||
| 608 | my ($line, $lineno) = (shift, shift); | ||||
| 609 | return ($lineno == 1 && $line =~ /^ID / && $line =~ /BP\.$/); | ||||
| 610 | } | ||||
| 611 | |||||
| 612 | =head2 _possibly_fasta | ||||
| 613 | |||||
| 614 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 615 | |||||
| 616 | =cut | ||||
| 617 | |||||
| 618 | sub _possibly_fasta | ||||
| 619 | { | ||||
| 620 | my ($line, $lineno) = (shift, shift); | ||||
| 621 | return (($lineno != 1 && $line =~ /^[A-IK-NP-Z]+$/i) || | ||||
| 622 | $line =~ /^>\s*\w/); | ||||
| 623 | } | ||||
| 624 | |||||
| 625 | =head2 _possibly_fastq | ||||
| 626 | |||||
| 627 | From bioperl test data. | ||||
| 628 | |||||
| 629 | =cut | ||||
| 630 | |||||
| 631 | sub _possibly_fastq | ||||
| 632 | { | ||||
| 633 | my ($line, $lineno) = (shift, shift); | ||||
| 634 | return ( ($lineno == 1 && $line =~ /^@/) || | ||||
| 635 | ($lineno == 3 && $line =~ /^\+/) ); | ||||
| 636 | } | ||||
| 637 | |||||
| 638 | =head2 _possibly_fastxy | ||||
| 639 | |||||
| 640 | From bioperl test data. | ||||
| 641 | |||||
| 642 | =cut | ||||
| 643 | |||||
| 644 | sub _possibly_fastxy | ||||
| 645 | { | ||||
| 646 | my ($line, $lineno) = (shift, shift); | ||||
| 647 | return (($lineno == 1 && $line =~ /^ FAST(?:XY|A)/) || | ||||
| 648 | ($lineno == 2 && $line =~ /^ version \d/)); | ||||
| 649 | } | ||||
| 650 | |||||
| 651 | =head2 _possibly_game | ||||
| 652 | |||||
| 653 | From bioperl testdata. | ||||
| 654 | |||||
| 655 | =cut | ||||
| 656 | |||||
| 657 | sub _possibly_game | ||||
| 658 | { | ||||
| 659 | my ($line, $lineno) = (shift, shift); | ||||
| 660 | return ($line =~ /^<!DOCTYPE game/); | ||||
| 661 | } | ||||
| 662 | |||||
| 663 | =head2 _possibly_gcg | ||||
| 664 | |||||
| 665 | From bioperl, Bio::SeqIO::gcg. | ||||
| 666 | |||||
| 667 | =cut | ||||
| 668 | |||||
| 669 | sub _possibly_gcg | ||||
| 670 | { | ||||
| 671 | my ($line, $lineno) = (shift, shift); | ||||
| 672 | return ($line =~ /Length: .*Type: .*Check: .*\.\.$/); | ||||
| 673 | } | ||||
| 674 | |||||
| 675 | =head2 _possibly_gcgblast | ||||
| 676 | |||||
| 677 | From bioperl testdata. | ||||
| 678 | |||||
| 679 | =cut | ||||
| 680 | |||||
| 681 | sub _possibly_gcgblast | ||||
| 682 | { | ||||
| 683 | my ($line, $lineno) = (shift, shift); | ||||
| 684 | return (($lineno == 1 && $line =~ /^!!SEQUENCE_LIST/) || | ||||
| 685 | ($lineno == 2 && | ||||
| 686 | $line =~ /^[[:upper:]]*BLAST[[:upper:]]*.*\[.*\]$/)); | ||||
| 687 | } | ||||
| 688 | |||||
| 689 | =head2 _possibly_gcgfasta | ||||
| 690 | |||||
| 691 | From bioperl testdata. | ||||
| 692 | |||||
| 693 | =cut | ||||
| 694 | |||||
| 695 | sub _possibly_gcgfasta | ||||
| 696 | { | ||||
| 697 | my ($line, $lineno) = (shift, shift); | ||||
| 698 | return (($lineno == 1 && $line =~ /^!!SEQUENCE_LIST/) || | ||||
| 699 | ($lineno == 2 && $line =~ /FASTA/)); | ||||
| 700 | } | ||||
| 701 | |||||
| 702 | =head2 _possibly_gde | ||||
| 703 | |||||
| 704 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 705 | |||||
| 706 | =cut | ||||
| 707 | |||||
| 708 | sub _possibly_gde | ||||
| 709 | { | ||||
| 710 | my ($line, $lineno) = (shift, shift); | ||||
| 711 | return ($line =~ /^[{}]$/ || | ||||
| 712 | $line =~ /^(?:name|longname|sequence-ID| | ||||
| 713 | creation-date|direction|strandedness| | ||||
| 714 | type|offset|group-ID|creator|descrip| | ||||
| 715 | comment|sequence)/x); | ||||
| 716 | } | ||||
| 717 | |||||
| 718 | =head2 _possibly_genbank | ||||
| 719 | |||||
| 720 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 721 | Format of [apparantly optional] file header from | ||||
| 722 | "http://www.umdnj.edu/rcompweb/PA/Notes/GenbankFF.htm". (TODO: dead link) | ||||
| 723 | |||||
| 724 | =cut | ||||
| 725 | |||||
| 726 | sub _possibly_genbank | ||||
| 727 | { | ||||
| 728 | my ($line, $lineno) = (shift, shift); | ||||
| 729 | return (($lineno == 1 && $line =~ /GENETIC SEQUENCE DATA BANK/) || | ||||
| 730 | ($lineno == 1 && $line =~ /^LOCUS /) || | ||||
| 731 | ($lineno == 2 && $line =~ /^DEFINITION /) || | ||||
| 732 | ($lineno == 3 && $line =~ /^ACCESSION /)); | ||||
| 733 | } | ||||
| 734 | |||||
| 735 | =head2 _possibly_genscan | ||||
| 736 | |||||
| 737 | From bioperl test data. | ||||
| 738 | |||||
| 739 | =cut | ||||
| 740 | |||||
| 741 | sub _possibly_genscan | ||||
| 742 | { | ||||
| 743 | my ($line, $lineno) = (shift, shift); | ||||
| 744 | return (($lineno == 1 && $line =~ /^GENSCAN.*Date.*Time/) || | ||||
| 745 | ($line =~ /^(?:Sequence\s+\w+|Parameter matrix|Predicted genes)/)); | ||||
| 746 | } | ||||
| 747 | |||||
| 748 | =head2 _possibly_gff | ||||
| 749 | |||||
| 750 | From bioperl test data. | ||||
| 751 | |||||
| 752 | =cut | ||||
| 753 | |||||
| 754 | sub _possibly_gff | ||||
| 755 | { | ||||
| 756 | my ($line, $lineno) = (shift, shift); | ||||
| 757 | return (($lineno == 1 && $line =~ /^##gff-version/) || | ||||
| 758 | ($lineno == 2 && $line =~ /^##date/)); | ||||
| 759 | } | ||||
| 760 | |||||
| 761 | =head2 _possibly_hmmer | ||||
| 762 | |||||
| 763 | From bioperl test data. | ||||
| 764 | |||||
| 765 | =cut | ||||
| 766 | |||||
| 767 | sub _possibly_hmmer | ||||
| 768 | { | ||||
| 769 | my ($line, $lineno) = (shift, shift); | ||||
| 770 | return (($lineno == 2 && $line =~ /^HMMER/) || | ||||
| 771 | ($lineno == 3 && | ||||
| 772 | $line =~ /Washington University School of Medicine/)); | ||||
| 773 | } | ||||
| 774 | |||||
| 775 | =head2 _possibly_nexus | ||||
| 776 | |||||
| 777 | From "http://paup.csit.fsu.edu/nfiles.html". | ||||
| 778 | |||||
| 779 | =cut | ||||
| 780 | |||||
| 781 | sub _possibly_nexus | ||||
| 782 | { | ||||
| 783 | my ($line, $lineno) = (shift, shift); | ||||
| 784 | return ($lineno == 1 && $line =~ /^#NEXUS/); | ||||
| 785 | } | ||||
| 786 | |||||
| 787 | =head2 _possibly_mase | ||||
| 788 | |||||
| 789 | From bioperl test data. | ||||
| 790 | More detail from "http://www.umdnj.edu/rcompweb/PA/Notes/GenbankFF.htm" (TODO: dead link) | ||||
| 791 | |||||
| 792 | =cut | ||||
| 793 | |||||
| 794 | sub _possibly_mase | ||||
| 795 | { | ||||
| 796 | my ($line, $lineno) = (shift, shift); | ||||
| 797 | return (($lineno == 1 && $line =~ /^;;/) || | ||||
| 798 | ($lineno > 1 && $line =~ /^;[^;]?/)); | ||||
| 799 | } | ||||
| 800 | |||||
| 801 | =head2 _possibly_mega | ||||
| 802 | |||||
| 803 | From the ensembl broswer (AlignView data export). | ||||
| 804 | |||||
| 805 | =cut | ||||
| 806 | |||||
| 807 | sub _possibly_mega | ||||
| 808 | { | ||||
| 809 | my ($line, $lineno) = (shift, shift); | ||||
| 810 | return ($lineno == 1 && $line =~ /^#mega$/); | ||||
| 811 | } | ||||
| 812 | |||||
| 813 | |||||
| 814 | =head2 _possibly_msf | ||||
| 815 | |||||
| 816 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 817 | |||||
| 818 | =cut | ||||
| 819 | |||||
| 820 | sub _possibly_msf | ||||
| 821 | { | ||||
| 822 | my ($line, $lineno) = (shift, shift); | ||||
| 823 | return ($line =~ m{^//} || | ||||
| 824 | $line =~ /MSF:.*Type:.*Check:|Name:.*Len:/); | ||||
| 825 | } | ||||
| 826 | |||||
| 827 | =head2 _possibly_phrap | ||||
| 828 | |||||
| 829 | From "http://biodata.ccgb.umn.edu/docs/contigimage.html". (TODO: dead link) | ||||
| 830 | From "http://genetics.gene.cwru.edu/gene508/Lec6.htm". (TODO: dead link) | ||||
| 831 | From bioperl test data ("*.ace.1" files). | ||||
| 832 | |||||
| 833 | =cut | ||||
| 834 | |||||
| 835 | sub _possibly_phrap | ||||
| 836 | { | ||||
| 837 | my ($line, $lineno) = (shift, shift); | ||||
| 838 | return ($line =~ /^(?:AS\ |CO\ Contig|BQ|AF\ |BS\ |RD\ | | ||||
| 839 | QA\ |DS\ |RT\{)/x); | ||||
| 840 | } | ||||
| 841 | |||||
| 842 | =head2 _possibly_pir | ||||
| 843 | |||||
| 844 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 845 | The ".,()" spotted in bioperl test data. | ||||
| 846 | |||||
| 847 | =cut | ||||
| 848 | |||||
| 849 | sub _possibly_pir # "NBRF/PIR" (?) | ||||
| 850 | { | ||||
| 851 | my ($line, $lineno) = (shift, shift); | ||||
| 852 | return (($lineno != 1 && $line =~ /^[\sA-IK-NP-Z.,()]+\*?$/i) || | ||||
| 853 | $line =~ /^>(?:P1|F1|DL|DC|RL|RC|N3|N1);/); | ||||
| 854 | } | ||||
| 855 | |||||
| 856 | =head2 _possibly_pfam | ||||
| 857 | |||||
| 858 | From bioperl test data. | ||||
| 859 | |||||
| 860 | =cut | ||||
| 861 | |||||
| 862 | sub _possibly_pfam | ||||
| 863 | { | ||||
| 864 | my ($line, $lineno) = (shift, shift); | ||||
| 865 | return ($line =~ m{^\w+/\d+-\d+\s+[A-IK-NP-Z.]+}i); | ||||
| 866 | } | ||||
| 867 | |||||
| 868 | =head2 _possibly_phylip | ||||
| 869 | |||||
| 870 | From "http://www.ebi.ac.uk/help/formats.html". Initial space | ||||
| 871 | allowed on first line (spotted in ensembl AlignView exported | ||||
| 872 | data). | ||||
| 873 | |||||
| 874 | =cut | ||||
| 875 | |||||
| 876 | sub _possibly_phylip | ||||
| 877 | { | ||||
| 878 | my ($line, $lineno) = (shift, shift); | ||||
| 879 | return (($lineno == 1 && $line =~ /^\s*\d+\s\d+/) || | ||||
| 880 | ($lineno == 2 && $line =~ /^\w\s+[A-IK-NP-Z\s]+/) || | ||||
| 881 | ($lineno == 3 && $line =~ /(?:^\w\s+[A-IK-NP-Z\s]+|\s+[A-IK-NP-Z\s]+)/) | ||||
| 882 | ); | ||||
| 883 | } | ||||
| 884 | |||||
| 885 | =head2 _possibly_prodom | ||||
| 886 | |||||
| 887 | From "http://prodom.prabi.fr/prodom/current/documentation/data.php". | ||||
| 888 | |||||
| 889 | =cut | ||||
| 890 | |||||
| 891 | sub _possibly_prodom | ||||
| 892 | { | ||||
| 893 | my ($line, $lineno) = (shift, shift); | ||||
| 894 | return ($lineno == 1 && $line =~ /^ID / && $line =~ /\d+ seq\.$/); | ||||
| 895 | } | ||||
| 896 | |||||
| 897 | =head2 _possibly_raw | ||||
| 898 | |||||
| 899 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 900 | |||||
| 901 | =cut | ||||
| 902 | |||||
| 903 | sub _possibly_raw | ||||
| 904 | { | ||||
| 905 | my ($line, $lineno) = (shift, shift); | ||||
| 906 | return ($line =~ /^[A-Za-z\s]+$/); | ||||
| 907 | } | ||||
| 908 | |||||
| 909 | =head2 _possibly_rsf | ||||
| 910 | |||||
| 911 | From "http://www.ebi.ac.uk/help/formats.html". | ||||
| 912 | |||||
| 913 | =cut | ||||
| 914 | |||||
| 915 | sub _possibly_rsf | ||||
| 916 | { | ||||
| 917 | my ($line, $lineno) = (shift, shift); | ||||
| 918 | return (($lineno == 1 && $line =~ /^!!RICH_SEQUENCE/) || | ||||
| 919 | $line =~ /^[{}]$/ || | ||||
| 920 | $line =~ /^(?:name|type|longname| | ||||
| 921 | checksum|creation-date|strand|sequence)/x); | ||||
| 922 | } | ||||
| 923 | |||||
| 924 | =head2 _possibly_selex | ||||
| 925 | |||||
| 926 | From "http://www.ebc.ee/WWW/hmmer2-html/node27.html". | ||||
| 927 | |||||
| 928 | Assuming presence of Selex file header. Data exported by | ||||
| 929 | Bioperl on Pfam and Selex formats are identical, but Pfam file | ||||
| 930 | only holds one alignment. | ||||
| 931 | |||||
| 932 | =cut | ||||
| 933 | |||||
| 934 | sub _possibly_selex | ||||
| 935 | { | ||||
| 936 | my ($line, $lineno) = (shift, shift); | ||||
| 937 | return (($lineno == 1 && $line =~ /^#=ID /) || | ||||
| 938 | ($lineno == 2 && $line =~ /^#=AC /) || | ||||
| 939 | ($line =~ /^#=SQ /)); | ||||
| 940 | } | ||||
| 941 | |||||
| 942 | =head2 _possibly_stockholm | ||||
| 943 | |||||
| 944 | From bioperl test data. | ||||
| 945 | |||||
| 946 | =cut | ||||
| 947 | |||||
| 948 | sub _possibly_stockholm | ||||
| 949 | { | ||||
| 950 | my ($line, $lineno) = (shift, shift); | ||||
| 951 | return (($lineno == 1 && $line =~ /^# STOCKHOLM/) || | ||||
| 952 | $line =~ /^#=(?:GF|GS) /); | ||||
| 953 | } | ||||
| 954 | |||||
| - - | |||||
| 957 | =head2 _possibly_swiss | ||||
| 958 | |||||
| 959 | From "http://ca.expasy.org/sprot/userman.html#entrystruc". | ||||
| 960 | |||||
| 961 | =cut | ||||
| 962 | |||||
| 963 | sub _possibly_swiss | ||||
| 964 | { | ||||
| 965 | my ($line, $lineno) = (shift, shift); | ||||
| 966 | return ($lineno == 1 && $line =~ /^ID / && $line =~ /AA\.$/); | ||||
| 967 | } | ||||
| 968 | |||||
| 969 | =head2 _possibly_tab | ||||
| 970 | |||||
| 971 | Contributed by Heikki. | ||||
| 972 | |||||
| 973 | =cut | ||||
| 974 | |||||
| 975 | sub _possibly_tab | ||||
| 976 | { | ||||
| 977 | my ($line, $lineno) = (shift, shift); | ||||
| 978 | return ($lineno == 1 && $line =~ /^[^\t]+\t[^\t]+/) ; | ||||
| 979 | } | ||||
| 980 | |||||
| 981 | =head2 _possibly_vcf | ||||
| 982 | |||||
| 983 | From "http://www.1000genomes.org/wiki/analysis/vcf4.0". | ||||
| 984 | |||||
| 985 | Assumptions made about sanity - format and date lines are line 1 and 2 | ||||
| 986 | respectively. This is not specified in the format document. | ||||
| 987 | |||||
| 988 | =cut | ||||
| 989 | |||||
| 990 | sub _possibly_vcf | ||||
| 991 | { | ||||
| 992 | my ($line, $lineno) = (shift, shift); | ||||
| 993 | return (($lineno == 1 && $line =~ /##fileformat=VCFv/) || | ||||
| 994 | ($lineno == 2 && $line =~ /##fileDate=/)); | ||||
| 995 | } | ||||
| 996 | |||||
| - - | |||||
| 999 | 1 | 48µs | 1; |