| Filename | /home/ss5/perl5/perlbrew/perls/perl-5.22.0/lib/site_perl/5.22.0/SQL/Tokenizer.pm |
| Statements | Executed 13 statements in 537µs |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 9µs | 12µs | SQL::Tokenizer::BEGIN@3 |
| 1 | 1 | 1 | 6µs | 6µs | SQL::Tokenizer::BEGIN@6 |
| 1 | 1 | 1 | 4µs | 13µs | SQL::Tokenizer::BEGIN@8 |
| 1 | 1 | 1 | 4µs | 5µs | SQL::Tokenizer::BEGIN@4 |
| 1 | 1 | 1 | 2µs | 2µs | SQL::Tokenizer::CORE:qr (opcode) |
| 0 | 0 | 0 | 0s | 0s | SQL::Tokenizer::tokenize |
| 0 | 0 | 0 | 0s | 0s | SQL::Tokenizer::tokenize_sql |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package SQL::Tokenizer; | ||||
| 2 | |||||
| 3 | 2 | 13µs | 2 | 16µs | # spent 12µs (9+4) within SQL::Tokenizer::BEGIN@3 which was called:
# once (9µs+4µs) by SQL::SplitStatement::BEGIN@14 at line 3 # spent 12µs making 1 call to SQL::Tokenizer::BEGIN@3
# spent 4µs making 1 call to warnings::import |
| 4 | 2 | 10µs | 2 | 6µs | # spent 5µs (4+1000ns) within SQL::Tokenizer::BEGIN@4 which was called:
# once (4µs+1000ns) by SQL::SplitStatement::BEGIN@14 at line 4 # spent 5µs making 1 call to SQL::Tokenizer::BEGIN@4
# spent 1µs making 1 call to strict::import |
| 5 | |||||
| 6 | 2 | 23µs | 1 | 6µs | # spent 6µs within SQL::Tokenizer::BEGIN@6 which was called:
# once (6µs+0s) by SQL::SplitStatement::BEGIN@14 at line 6 # spent 6µs making 1 call to SQL::Tokenizer::BEGIN@6 |
| 7 | |||||
| 8 | 2 | 476µs | 2 | 22µs | # spent 13µs (4+9) within SQL::Tokenizer::BEGIN@8 which was called:
# once (4µs+9µs) by SQL::SplitStatement::BEGIN@14 at line 8 # spent 13µs making 1 call to SQL::Tokenizer::BEGIN@8
# spent 9µs making 1 call to Exporter::import |
| 9 | |||||
| 10 | 1 | 4µs | our @ISA = qw(Exporter); | ||
| 11 | |||||
| 12 | 1 | 300ns | our @EXPORT_OK= qw(tokenize_sql); | ||
| 13 | |||||
| 14 | 1 | 200ns | our $VERSION= '0.24'; | ||
| 15 | |||||
| 16 | 1 | 6µs | 1 | 2µs | my $re= qr{ # spent 2µs making 1 call to SQL::Tokenizer::CORE:qr |
| 17 | ( | ||||
| 18 | (?:--|\#)[\ \t\S]* # single line comments | ||||
| 19 | | | ||||
| 20 | (?:<>|<=>|>=|<=|==|=|!=|!|<<|>>|<|>|\|\||\||&&|&|-|\+|\*(?!/)|/(?!\*)|\%|~|\^|\?) | ||||
| 21 | # operators and tests | ||||
| 22 | | | ||||
| 23 | [\[\]\(\),;.] # punctuation (parenthesis, comma) | ||||
| 24 | | | ||||
| 25 | \'\'(?!\') # empty single quoted string | ||||
| 26 | | | ||||
| 27 | \"\"(?!\"") # empty double quoted string | ||||
| 28 | | | ||||
| 29 | "(?>(?:(?>[^"\\]+)|""|\\.)*)+" | ||||
| 30 | # anything inside double quotes, ungreedy | ||||
| 31 | | | ||||
| 32 | `(?>(?:(?>[^`\\]+)|``|\\.)*)+` | ||||
| 33 | # anything inside backticks quotes, ungreedy | ||||
| 34 | | | ||||
| 35 | '(?>(?:(?>[^'\\]+)|''|\\.)*)+' | ||||
| 36 | # anything inside single quotes, ungreedy. | ||||
| 37 | | | ||||
| 38 | /\*[\ \t\r\n\S]*?\*/ # C style comments | ||||
| 39 | | | ||||
| 40 | (?:[\w:@]+(?:\.(?:\w+|\*)?)*) | ||||
| 41 | # words, standard named placeholders, db.table.*, db.* | ||||
| 42 | | | ||||
| 43 | (?: \$_\$ | \$\d+ | \${1,2} ) | ||||
| 44 | # dollar expressions - eg $_$ $3 $$ | ||||
| 45 | | | ||||
| 46 | \n # newline | ||||
| 47 | | | ||||
| 48 | [\t\ ]+ # any kind of white spaces | ||||
| 49 | ) | ||||
| 50 | }smx; | ||||
| 51 | |||||
| 52 | sub tokenize_sql { | ||||
| 53 | my ( $query, $remove_white_tokens )= @_; | ||||
| 54 | |||||
| 55 | my @query= $query =~ m{$re}smxg; | ||||
| 56 | |||||
| 57 | if ($remove_white_tokens) { | ||||
| 58 | @query= grep( !/^[\s\n\r]*$/, @query ); | ||||
| 59 | } | ||||
| 60 | |||||
| 61 | return wantarray ? @query : \@query; | ||||
| 62 | } | ||||
| 63 | |||||
| 64 | sub tokenize { | ||||
| 65 | my $class= shift; | ||||
| 66 | return tokenize_sql(@_); | ||||
| 67 | } | ||||
| 68 | |||||
| 69 | 1 | 3µs | 1; | ||
| 70 | |||||
| 71 | =pod | ||||
| 72 | |||||
| 73 | =head1 NAME | ||||
| 74 | |||||
| 75 | SQL::Tokenizer - A simple SQL tokenizer. | ||||
| 76 | |||||
| 77 | =head1 VERSION | ||||
| 78 | |||||
| 79 | 0.20 | ||||
| 80 | |||||
| 81 | =head1 SYNOPSIS | ||||
| 82 | |||||
| 83 | use SQL::Tokenizer qw(tokenize_sql); | ||||
| 84 | |||||
| 85 | my $query= q{SELECT 1 + 1}; | ||||
| 86 | my @tokens= SQL::Tokenizer->tokenize($query); | ||||
| 87 | |||||
| 88 | # @tokens now contains ('SELECT', ' ', '1', ' ', '+', ' ', '1') | ||||
| 89 | |||||
| 90 | @tokens= tokenize_sql($query); # procedural interface | ||||
| 91 | |||||
| 92 | =head1 DESCRIPTION | ||||
| 93 | |||||
| 94 | SQL::Tokenizer is a simple tokenizer for SQL queries. It does not claim to be | ||||
| 95 | a parser or query verifier. It just creates sane tokens from a valid SQL | ||||
| 96 | query. | ||||
| 97 | |||||
| 98 | It supports SQL with comments like: | ||||
| 99 | |||||
| 100 | -- This query is used to insert a message into | ||||
| 101 | -- logs table | ||||
| 102 | INSERT INTO log (application, message) VALUES (?, ?) | ||||
| 103 | |||||
| 104 | Also supports C<''>, C<""> and C<\'> escaping methods, so tokenizing queries | ||||
| 105 | like the one below should not be a problem: | ||||
| 106 | |||||
| 107 | INSERT INTO log (application, message) | ||||
| 108 | VALUES ('myapp', 'Hey, this is a ''single quoted string''!') | ||||
| 109 | |||||
| 110 | =head1 API | ||||
| 111 | |||||
| 112 | =over 4 | ||||
| 113 | |||||
| 114 | =item tokenize_sql | ||||
| 115 | |||||
| 116 | use SQL::Tokenizer qw(tokenize_sql); | ||||
| 117 | |||||
| 118 | my @tokens= tokenize_sql($query); | ||||
| 119 | my $tokens= tokenize_sql($query); | ||||
| 120 | |||||
| 121 | $tokens= tokenize_sql( $query, $remove_white_tokens ); | ||||
| 122 | |||||
| 123 | C<tokenize_sql> can be imported to current namespace on request. It receives a | ||||
| 124 | SQL query, and returns an array of tokens if called in list context, or an | ||||
| 125 | arrayref if called in scalar context. | ||||
| 126 | |||||
| 127 | =item tokenize | ||||
| 128 | |||||
| 129 | my @tokens= SQL::Tokenizer->tokenize($query); | ||||
| 130 | my $tokens= SQL::Tokenizer->tokenize($query); | ||||
| 131 | |||||
| 132 | $tokens= SQL::Tokenizer->tokenize( $query, $remove_white_tokens ); | ||||
| 133 | |||||
| 134 | This is the only available class method. It receives a SQL query, and returns an | ||||
| 135 | array of tokens if called in list context, or an arrayref if called in scalar | ||||
| 136 | context. | ||||
| 137 | |||||
| 138 | If C<$remove_white_tokens> is true, white spaces only tokens will be removed from | ||||
| 139 | result. | ||||
| 140 | |||||
| 141 | =back | ||||
| 142 | |||||
| 143 | =head1 ACKNOWLEDGEMENTS | ||||
| 144 | |||||
| 145 | =over 4 | ||||
| 146 | |||||
| 147 | =item | ||||
| 148 | |||||
| 149 | Evan Harris, for implementing Shell comment style and SQL operators. | ||||
| 150 | |||||
| 151 | =item | ||||
| 152 | |||||
| 153 | Charlie Hills, for spotting a lot of important issues I haven't thought. | ||||
| 154 | |||||
| 155 | =item | ||||
| 156 | |||||
| 157 | Jonas Kramer, for fixing MySQL quoted strings and treating dot as punctuation character correctly. | ||||
| 158 | |||||
| 159 | =item | ||||
| 160 | |||||
| 161 | Emanuele Zeppieri, for asking to fix SQL::Tokenizer to support dollars as well. | ||||
| 162 | |||||
| 163 | =item | ||||
| 164 | |||||
| 165 | Nigel Metheringham, for extending the dollar signal support. | ||||
| 166 | |||||
| 167 | =item | ||||
| 168 | |||||
| 169 | Devin Withers, for making it not choke on CR+LF in comments. | ||||
| 170 | |||||
| 171 | =item | ||||
| 172 | |||||
| 173 | Luc Lanthier, for simplifying the regex and make it not choke on backslashes. | ||||
| 174 | |||||
| 175 | =back | ||||
| 176 | |||||
| 177 | =head1 AUTHOR | ||||
| 178 | |||||
| 179 | Copyright (c) 2007, 2008, 2009, 2010, 2011 Igor Sutton Lopes "<IZUT@cpan.org>". All rights | ||||
| 180 | reserved. | ||||
| 181 | |||||
| 182 | This module is free software; you can redistribute it and/or modify it under | ||||
| 183 | the same terms as Perl itself. | ||||
| 184 | |||||
| 185 | =cut | ||||
| 186 | |||||
# spent 2µs within SQL::Tokenizer::CORE:qr which was called:
# once (2µs+0s) by SQL::SplitStatement::BEGIN@14 at line 16 |