| Filename | /home/hinrik/perl5/perlbrew/perls/perl-5.13.5/lib/site_perl/5.13.5/Regexp/Common.pm |
| Statements | Executed 1051 statements in 5.63ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 19 | 19 | 18 | 5.86ms | 23.6ms | Regexp::Common::import (recurses: max depth 1, inclusive time 1.50ms) |
| 16 | 16 | 13 | 1.24ms | 1.68ms | Regexp::Common::pattern |
| 183 | 5 | 1 | 225µs | 225µs | Regexp::Common::CORE:match (opcode) |
| 17 | 2 | 1 | 212µs | 212µs | Regexp::Common::get_cache |
| 20 | 2 | 1 | 140µs | 140µs | Regexp::Common::TIEHASH |
| 73 | 3 | 1 | 122µs | 122µs | Regexp::Common::CORE:regcomp (opcode) |
| 1 | 1 | 1 | 54µs | 93µs | Regexp::Common::_decache |
| 33 | 1 | 1 | 45µs | 45µs | Regexp::Common::CORE:subst (opcode) |
| 1 | 1 | 1 | 37µs | 37µs | Regexp::Common::BEGIN@3 |
| 1 | 1 | 1 | 33µs | 125µs | Regexp::Common::Entry::__ANON__[:268] |
| 1 | 1 | 1 | 21µs | 125µs | Regexp::Common::BEGIN@19 |
| 1 | 1 | 1 | 20µs | 70µs | Regexp::Common::Entry::BEGIN@257 |
| 1 | 1 | 1 | 20µs | 77µs | Regexp::Common::BEGIN@163 |
| 1 | 1 | 1 | 15µs | 21µs | Regexp::Common::new |
| 1 | 1 | 1 | 15µs | 36µs | Regexp::Common::BEGIN@60 |
| 1 | 1 | 1 | 12µs | 17µs | Regexp::Common::BEGIN@4 |
| 1 | 1 | 1 | 12µs | 12µs | Regexp::Common::Entry::CORE:subst (opcode) |
| 1 | 1 | 1 | 12µs | 36µs | Regexp::Common::BEGIN@117 |
| 1 | 1 | 1 | 12µs | 33µs | Regexp::Common::FETCH |
| 1 | 1 | 1 | 11µs | 20µs | Regexp::Common::BEGIN@18 |
| 1 | 1 | 1 | 11µs | 11µs | Regexp::Common::Entry::_clone_with |
| 1 | 1 | 1 | 11µs | 34µs | Regexp::Common::BEGIN@128 |
| 1 | 1 | 1 | 11µs | 36µs | Regexp::Common::BEGIN@13 |
| 1 | 1 | 1 | 10µs | 31µs | Regexp::Common::BEGIN@69 |
| 1 | 1 | 1 | 9µs | 9µs | Regexp::Common::CORE:qr (opcode) |
| 1 | 1 | 1 | 6µs | 6µs | Regexp::Common::BEGIN@6 |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::AUTOLOAD |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::DESTROY |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::__ANON__[:14] |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::__ANON__[:188] |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::__ANON__[:231] |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::_carp |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::_croak |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::croak_version |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::generic_match |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::generic_subs |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::matches |
| 0 | 0 | 0 | 0s | 0s | Regexp::Common::subs |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package Regexp::Common; | ||||
| 2 | |||||
| 3 | 2 | 42µs | 1 | 37µs | # spent 37µs within Regexp::Common::BEGIN@3 which was called:
# once (37µs+0s) by Hailo::Tokenizer::Words::BEGIN@7 at line 3 # spent 37µs making 1 call to Regexp::Common::BEGIN@3 |
| 4 | 2 | 48µs | 2 | 22µs | # spent 17µs (12+5) within Regexp::Common::BEGIN@4 which was called:
# once (12µs+5µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 4 # spent 17µs making 1 call to Regexp::Common::BEGIN@4
# spent 5µs making 1 call to strict::import |
| 5 | |||||
| 6 | # spent 6µs within Regexp::Common::BEGIN@6 which was called:
# once (6µs+0s) by Hailo::Tokenizer::Words::BEGIN@7 at line 16 | ||||
| 7 | # This makes sure 'use warnings' doesn't bomb out on 5.005_*; | ||||
| 8 | # warnings won't be enabled on those old versions though. | ||||
| 9 | # Since all other files use this file, we can use 'use warnings' | ||||
| 10 | # elsewhere as well, but *AFTER* 'use Regexp::Common'. | ||||
| 11 | 1 | 7µs | if ($] < 5.006) { | ||
| 12 | $INC {"warnings.pm"} = 1; | ||||
| 13 | 2 | 47µs | 2 | 61µs | # spent 36µs (11+25) within Regexp::Common::BEGIN@13 which was called:
# once (11µs+25µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 13 # spent 36µs making 1 call to Regexp::Common::BEGIN@13
# spent 25µs making 1 call to strict::unimport |
| 14 | *{"warnings::unimport"} = sub {0}; | ||||
| 15 | } | ||||
| 16 | 1 | 19µs | 1 | 6µs | } # spent 6µs making 1 call to Regexp::Common::BEGIN@6 |
| 17 | |||||
| 18 | 2 | 36µs | 2 | 29µs | # spent 20µs (11+9) within Regexp::Common::BEGIN@18 which was called:
# once (11µs+9µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 18 # spent 20µs making 1 call to Regexp::Common::BEGIN@18
# spent 9µs making 1 call to warnings::import |
| 19 | 2 | 193µs | 2 | 229µs | # spent 125µs (21+104) within Regexp::Common::BEGIN@19 which was called:
# once (21µs+104µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 19 # spent 125µs making 1 call to Regexp::Common::BEGIN@19
# spent 104µs making 1 call to vars::import |
| 20 | |||||
| 21 | 1 | 2µs | $VERSION = '2010010201'; | ||
| 22 | |||||
| 23 | |||||
| 24 | sub _croak { | ||||
| 25 | require Carp; | ||||
| 26 | goto &Carp::croak; | ||||
| 27 | } | ||||
| 28 | |||||
| 29 | sub _carp { | ||||
| 30 | require Carp; | ||||
| 31 | goto &Carp::carp; | ||||
| 32 | } | ||||
| 33 | |||||
| 34 | # spent 21µs (15+6) within Regexp::Common::new which was called:
# once (15µs+6µs) by Regexp::Common::FETCH at line 48 | ||||
| 35 | 1 | 2µs | my ($class, @data) = @_; | ||
| 36 | 1 | 1µs | my %self; | ||
| 37 | 1 | 7µs | 1 | 6µs | tie %self, $class, @data; # spent 6µs making 1 call to Regexp::Common::TIEHASH |
| 38 | 1 | 5µs | return \%self; | ||
| 39 | } | ||||
| 40 | |||||
| 41 | sub TIEHASH { | ||||
| 42 | 20 | 34µs | my ($class, @data) = @_; | ||
| 43 | 20 | 140µs | bless \@data, $class; | ||
| 44 | } | ||||
| 45 | |||||
| 46 | # spent 33µs (12+21) within Regexp::Common::FETCH which was called:
# once (12µs+21µs) by Hailo::Tokenizer::Words::make_tokens at line 65 of lib/Hailo/Tokenizer/Words.pm | ||||
| 47 | 1 | 1µs | my ($self, $extra) = @_; | ||
| 48 | 1 | 10µs | 1 | 21µs | return bless ref($self)->new(@$self, $extra), ref($self); # spent 21µs making 1 call to Regexp::Common::new |
| 49 | } | ||||
| 50 | |||||
| 51 | 1 | 21µs | my %imports = map {$_ => "Regexp::Common::$_"} | ||
| 52 | qw /balanced CC comment delimited lingua list | ||||
| 53 | net number profanity SEN URI whitespace | ||||
| 54 | zip/; | ||||
| 55 | |||||
| 56 | # spent 23.6ms (5.86+17.7) within Regexp::Common::import which was called 19 times, avg 1.24ms/call:
# once (4.59ms+19.0ms) by Hailo::Tokenizer::Words::BEGIN@7 at line 7 of lib/Hailo/Tokenizer/Words.pm
# once (71µs+-71µs) by Regexp::Common::URI::RFC1035::BEGIN@3 at line 3 of Regexp/Common/URI/RFC1035.pm
# once (67µs+-67µs) by Regexp::Common::URI::pop::BEGIN@3 at line 3 of Regexp/Common/URI/pop.pm
# once (69µs+-69µs) by Regexp::Common::URI::file::BEGIN@3 at line 3 of Regexp/Common/URI/file.pm
# once (68µs+-68µs) by Regexp::Common::URI::RFC1738::BEGIN@3 at line 3 of Regexp/Common/URI/RFC1738.pm
# once (87µs+-87µs) by Regexp::Common::URI::RFC2396::BEGIN@3 at line 3 of Regexp/Common/URI/RFC2396.pm
# once (87µs+-87µs) by Regexp::Common::URI::BEGIN@3 at line 3 of Regexp/Common/URI.pm
# once (65µs+-65µs) by Regexp::Common::URI::BEGIN@14 at line 14 of Regexp/Common/URI.pm
# once (69µs+-69µs) by Regexp::Common::URI::gopher::BEGIN@3 at line 3 of Regexp/Common/URI/gopher.pm
# once (68µs+-68µs) by Regexp::Common::URI::news::BEGIN@3 at line 3 of Regexp/Common/URI/news.pm
# once (69µs+-69µs) by Regexp::Common::URI::tv::BEGIN@6 at line 6 of Regexp/Common/URI/tv.pm
# once (68µs+-68µs) by Regexp::Common::URI::ftp::BEGIN@3 at line 3 of Regexp/Common/URI/ftp.pm
# once (77µs+-77µs) by Regexp::Common::URI::wais::BEGIN@3 at line 3 of Regexp/Common/URI/wais.pm
# once (66µs+-66µs) by Regexp::Common::URI::fax::BEGIN@3 at line 3 of Regexp/Common/URI/fax.pm
# once (68µs+-68µs) by Regexp::Common::URI::telnet::BEGIN@3 at line 3 of Regexp/Common/URI/telnet.pm
# once (69µs+-69µs) by Regexp::Common::URI::RFC2384::BEGIN@4 at line 4 of Regexp/Common/URI/RFC2384.pm
# once (71µs+-71µs) by Regexp::Common::URI::prospero::BEGIN@3 at line 3 of Regexp/Common/URI/prospero.pm
# once (71µs+-71µs) by Regexp::Common::URI::http::BEGIN@3 at line 3 of Regexp/Common/URI/http.pm
# once (68µs+-68µs) by Regexp::Common::URI::tel::BEGIN@3 at line 3 of Regexp/Common/URI/tel.pm | ||||
| 57 | 19 | 20µs | shift; # Shift off the class. | ||
| 58 | 19 | 112µs | 19 | 134µs | tie %RE, __PACKAGE__; # spent 134µs making 19 calls to Regexp::Common::TIEHASH, avg 7µs/call |
| 59 | { | ||||
| 60 | 21 | 114µs | 2 | 58µs | # spent 36µs (15+22) within Regexp::Common::BEGIN@60 which was called:
# once (15µs+22µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 60 # spent 36µs making 1 call to Regexp::Common::BEGIN@60
# spent 22µs making 1 call to strict::unimport |
| 61 | 19 | 79µs | *{caller() . "::RE"} = \%RE; | ||
| 62 | } | ||||
| 63 | |||||
| 64 | 19 | 19µs | my $saw_import; | ||
| 65 | 19 | 16µs | my $no_defaults; | ||
| 66 | 19 | 22µs | my %exclude; | ||
| 67 | 74 | 427µs | 55 | 68µs | foreach my $entry (grep {!/^RE_/} @_) { # spent 68µs making 55 calls to Regexp::Common::CORE:match, avg 1µs/call |
| 68 | 55 | 57µs | if ($entry eq 'pattern') { | ||
| 69 | 2 | 246µs | 2 | 52µs | # spent 31µs (10+21) within Regexp::Common::BEGIN@69 which was called:
# once (10µs+21µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 69 # spent 31µs making 1 call to Regexp::Common::BEGIN@69
# spent 21µs making 1 call to strict::unimport |
| 70 | 18 | 77µs | *{caller() . "::pattern"} = \&pattern; | ||
| 71 | 18 | 22µs | next; | ||
| 72 | } | ||||
| 73 | # This used to prevent $; from being set. We still recognize it, | ||||
| 74 | # but we won't do anything. | ||||
| 75 | 37 | 32µs | if ($entry eq 'clean') { | ||
| 76 | 18 | 18µs | next; | ||
| 77 | } | ||||
| 78 | 19 | 19µs | if ($entry eq 'no_defaults') { | ||
| 79 | 18 | 18µs | $no_defaults ++; | ||
| 80 | 18 | 17µs | next; | ||
| 81 | } | ||||
| 82 | 1 | 2µs | if (my $module = $imports {$entry}) { | ||
| 83 | 1 | 1µs | $saw_import ++; | ||
| 84 | 1 | 28µs | eval "require $module;"; # spent 156µs executing statements in string eval | ||
| 85 | 1 | 1µs | die $@ if $@; | ||
| 86 | 1 | 2µs | next; | ||
| 87 | } | ||||
| 88 | if ($entry =~ /^!(.*)/ && $imports {$1}) { | ||||
| 89 | $exclude {$1} ++; | ||||
| 90 | next; | ||||
| 91 | } | ||||
| 92 | # As a last resort, try to load the argument. | ||||
| 93 | my $module = $entry =~ /^Regexp::Common/ | ||||
| 94 | ? $entry | ||||
| 95 | : "Regexp::Common::" . $entry; | ||||
| 96 | eval "require $module;"; | ||||
| 97 | die $@ if $@; | ||||
| 98 | } | ||||
| 99 | |||||
| 100 | 19 | 20µs | unless ($saw_import || $no_defaults) { | ||
| 101 | foreach my $module (values %imports) { | ||||
| 102 | next if $exclude {$module}; | ||||
| 103 | eval "require $module;"; | ||||
| 104 | die $@ if $@; | ||||
| 105 | } | ||||
| 106 | } | ||||
| 107 | |||||
| 108 | 19 | 17µs | my %exported; | ||
| 109 | 74 | 437µs | 55 | 62µs | foreach my $entry (grep {/^RE_/} @_) { # spent 62µs making 55 calls to Regexp::Common::CORE:match, avg 1µs/call |
| 110 | if ($entry =~ /^RE_(\w+_)?ALL$/) { | ||||
| 111 | my $m = defined $1 ? $1 : ""; | ||||
| 112 | my $re = qr /^RE_${m}.*$/; | ||||
| 113 | while (my ($sub, $interface) = each %sub_interface) { | ||||
| 114 | next if $exported {$sub}; | ||||
| 115 | next unless $sub =~ /$re/; | ||||
| 116 | { | ||||
| 117 | 2 | 73µs | 2 | 60µs | # spent 36µs (12+24) within Regexp::Common::BEGIN@117 which was called:
# once (12µs+24µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 117 # spent 36µs making 1 call to Regexp::Common::BEGIN@117
# spent 24µs making 1 call to strict::unimport |
| 118 | *{caller() . "::$sub"} = $interface; | ||||
| 119 | } | ||||
| 120 | $exported {$sub} ++; | ||||
| 121 | } | ||||
| 122 | } | ||||
| 123 | else { | ||||
| 124 | next if $exported {$entry}; | ||||
| 125 | _croak "Can't export unknown subroutine &$entry" | ||||
| 126 | unless $sub_interface {$entry}; | ||||
| 127 | { | ||||
| 128 | 2 | 282µs | 2 | 56µs | # spent 34µs (11+22) within Regexp::Common::BEGIN@128 which was called:
# once (11µs+22µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 128 # spent 34µs making 1 call to Regexp::Common::BEGIN@128
# spent 22µs making 1 call to strict::unimport |
| 129 | *{caller() . "::$entry"} = $sub_interface {$entry}; | ||||
| 130 | } | ||||
| 131 | $exported {$entry} ++; | ||||
| 132 | } | ||||
| 133 | } | ||||
| 134 | } | ||||
| 135 | |||||
| 136 | sub AUTOLOAD { _croak "Can't $AUTOLOAD" } | ||||
| 137 | |||||
| 138 | sub DESTROY {} | ||||
| 139 | |||||
| 140 | 1 | 800ns | my %cache; | ||
| 141 | |||||
| 142 | 1 | 24µs | 1 | 9µs | my $fpat = qr/^(-\w+)/; # spent 9µs making 1 call to Regexp::Common::CORE:qr |
| 143 | |||||
| 144 | # spent 93µs (54+39) within Regexp::Common::_decache which was called:
# once (54µs+39µs) by Hailo::Tokenizer::Words::CORE:regcomp at line 65 of lib/Hailo/Tokenizer/Words.pm | ||||
| 145 | 1 | 3µs | my @args = @{tied %{$_[0]}}; | ||
| 146 | 2 | 14µs | 2 | 3µs | my @nonflags = grep {!/$fpat/} @args; # spent 2µs making 1 call to Regexp::Common::CORE:regcomp
# spent 2µs making 1 call to Regexp::Common::CORE:match |
| 147 | 1 | 5µs | 1 | 11µs | my $cache = get_cache(@nonflags); # spent 11µs making 1 call to Regexp::Common::get_cache |
| 148 | 1 | 2µs | _croak "Can't create unknown regex: \$RE{" | ||
| 149 | . join("}{",@args) . "}" | ||||
| 150 | unless exists $cache->{__VAL__}; | ||||
| 151 | 1 | 4µs | _croak "Perl $] does not support the pattern " | ||
| 152 | . "\$RE{" . join("}{",@args) | ||||
| 153 | . "}.\nYou need Perl $cache->{__VAL__}{version} or later" | ||||
| 154 | unless ($cache->{__VAL__}{version}||0) <= $]; | ||||
| 155 | 1 | 26µs | 4 | 14µs | my %flags = ( %{$cache->{__VAL__}{default}}, # spent 11µs making 2 calls to Regexp::Common::CORE:regcomp, avg 5µs/call
# spent 3µs making 2 calls to Regexp::Common::CORE:match, avg 1µs/call |
| 156 | 1 | 5µs | map { /$fpat\Q$;\E(.*)/ ? ($1 => $2) | ||
| 157 | : /$fpat/ ? ($1 => undef) | ||||
| 158 | : () | ||||
| 159 | } @args); | ||||
| 160 | 1 | 11µs | 1 | 11µs | $cache->{__VAL__}->_clone_with(\@args, \%flags); # spent 11µs making 1 call to Regexp::Common::Entry::_clone_with |
| 161 | } | ||||
| 162 | |||||
| 163 | 2 | 693µs | 2 | 134µs | # spent 77µs (20+57) within Regexp::Common::BEGIN@163 which was called:
# once (20µs+57µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 163 # spent 77µs making 1 call to Regexp::Common::BEGIN@163
# spent 57µs making 1 call to overload::import |
| 164 | |||||
| 165 | |||||
| 166 | sub get_cache { | ||||
| 167 | 17 | 22µs | my $cache = \%cache; | ||
| 168 | 17 | 43µs | foreach (@_) { | ||
| 169 | 34 | 96µs | $cache = $cache->{$_} | ||
| 170 | || ($cache->{$_} = {}); | ||||
| 171 | } | ||||
| 172 | 17 | 69µs | return $cache; | ||
| 173 | } | ||||
| 174 | |||||
| 175 | sub croak_version { | ||||
| 176 | my ($entry, @args) = @_; | ||||
| 177 | } | ||||
| 178 | |||||
| 179 | # spent 1.68ms (1.24+446µs) within Regexp::Common::pattern which was called 16 times, avg 105µs/call:
# once (117µs+46µs) by Regexp::Common::import at line 21 of Regexp/Common/URI/fax.pm
# once (86µs+31µs) by Regexp::Common::import at line 25 of Regexp/Common/URI/tel.pm
# once (85µs+32µs) by Regexp::Common::import at line 25 of Regexp/Common/URI/fax.pm
# once (82µs+34µs) by Regexp::Common::import at line 36 of Regexp/Common/URI/ftp.pm
# once (78µs+28µs) by Regexp::Common::import at line 22 of Regexp/Common/URI/tv.pm
# once (81µs+25µs) by Regexp::Common::import at line 21 of Regexp/Common/URI/wais.pm
# once (78µs+26µs) by Regexp::Common::import at line 22 of Regexp/Common/URI/pop.pm
# once (74µs+29µs) by Regexp::Common::import at line 26 of Regexp/Common/URI/http.pm
# once (73µs+28µs) by Regexp::Common::import at line 37 of Regexp/Common/URI/gopher.pm
# once (75µs+25µs) by Regexp::Common::import at line 21 of Regexp/Common/URI/tel.pm
# once (73µs+26µs) by Regexp::Common::import at line 20 of Regexp/Common/URI/file.pm
# once (73µs+26µs) by Regexp::Common::import at line 21 of Regexp/Common/URI/prospero.pm
# once (72µs+26µs) by Regexp::Common::import at line 25 of Regexp/Common/URI/news.pm
# once (71µs+25µs) by Regexp::Common::import at line 19 of Regexp/Common/URI/telnet.pm
# once (70µs+23µs) by Regexp::Common::import at line 29 of Regexp/Common/URI/news.pm
# once (47µs+15µs) by Regexp::Common::import at line 43 of Regexp/Common/URI.pm | ||||
| 180 | 16 | 52µs | my %spec = @_; | ||
| 181 | 16 | 27µs | _croak 'pattern() requires argument: name => [ @list ]' | ||
| 182 | unless $spec{name} && ref $spec{name} eq 'ARRAY'; | ||||
| 183 | 16 | 16µs | _croak 'pattern() requires argument: create => $sub_ref_or_string' | ||
| 184 | unless $spec{create}; | ||||
| 185 | |||||
| 186 | 16 | 31µs | if (ref $spec{create} ne "CODE") { | ||
| 187 | 12 | 21µs | my $fixed_str = "$spec{create}"; | ||
| 188 | $spec{create} = sub { $fixed_str } | ||||
| 189 | 12 | 33µs | } | ||
| 190 | |||||
| 191 | 16 | 16µs | my @nonflags; | ||
| 192 | 16 | 15µs | my %default; | ||
| 193 | 16 | 43µs | foreach ( @{$spec{name}} ) { | ||
| 194 | 37 | 606µs | 140 | 200µs | if (/$fpat=(.*)/) { # spent 109µs making 70 calls to Regexp::Common::CORE:regcomp, avg 2µs/call
# spent 91µs making 70 calls to Regexp::Common::CORE:match, avg 1µs/call |
| 195 | $default{$1} = $2; | ||||
| 196 | } | ||||
| 197 | elsif (/$fpat\s*$/) { | ||||
| 198 | $default{$1} = undef; | ||||
| 199 | } | ||||
| 200 | else { | ||||
| 201 | 33 | 50µs | push @nonflags, $_; | ||
| 202 | } | ||||
| 203 | } | ||||
| 204 | |||||
| 205 | 16 | 60µs | 16 | 201µs | my $entry = get_cache(@nonflags); # spent 201µs making 16 calls to Regexp::Common::get_cache, avg 13µs/call |
| 206 | |||||
| 207 | 16 | 19µs | if ($entry->{__VAL__}) { | ||
| 208 | _carp "Overriding \$RE{" | ||||
| 209 | . join("}{",@nonflags) | ||||
| 210 | . "}"; | ||||
| 211 | } | ||||
| 212 | |||||
| 213 | 16 | 103µs | $entry->{__VAL__} = bless { | ||
| 214 | create => $spec{create}, | ||||
| 215 | match => $spec{match} || \&generic_match, | ||||
| 216 | subs => $spec{subs} || \&generic_subs, | ||||
| 217 | version => $spec{version}, | ||||
| 218 | default => \%default, | ||||
| 219 | }, 'Regexp::Common::Entry'; | ||||
| 220 | |||||
| 221 | 49 | 228µs | 33 | 45µs | foreach (@nonflags) {s/\W/X/g} # spent 45µs making 33 calls to Regexp::Common::CORE:subst, avg 1µs/call |
| 222 | 16 | 29µs | my $subname = "RE_" . join ("_", @nonflags); | ||
| 223 | $sub_interface{$subname} = sub { | ||||
| 224 | push @_ => undef if @_ % 2; | ||||
| 225 | my %flags = @_; | ||||
| 226 | my $pat = $spec{create}->($entry->{__VAL__}, | ||||
| 227 | {%default, %flags}, \@nonflags); | ||||
| 228 | if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; } | ||||
| 229 | else { $pat =~ s/\Q(?k:/(?:/g; } | ||||
| 230 | return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/; | ||||
| 231 | 16 | 63µs | }; | ||
| 232 | |||||
| 233 | 16 | 71µs | return 1; | ||
| 234 | } | ||||
| 235 | |||||
| 236 | sub generic_match {$_ [1] =~ /$_[0]/} | ||||
| 237 | sub generic_subs {$_ [1] =~ s/$_[0]/$_[2]/} | ||||
| 238 | |||||
| 239 | sub matches { | ||||
| 240 | my ($self, $str) = @_; | ||||
| 241 | my $entry = $self -> _decache; | ||||
| 242 | $entry -> {match} -> ($entry, $str); | ||||
| 243 | } | ||||
| 244 | |||||
| 245 | sub subs { | ||||
| 246 | my ($self, $str, $newstr) = @_; | ||||
| 247 | my $entry = $self -> _decache; | ||||
| 248 | $entry -> {subs} -> ($entry, $str, $newstr); | ||||
| 249 | return $str; | ||||
| 250 | } | ||||
| 251 | |||||
| 252 | |||||
| 253 | package Regexp::Common::Entry; | ||||
| 254 | # use Carp; | ||||
| 255 | |||||
| 256 | use overload | ||||
| 257 | # spent 70µs (20+50) within Regexp::Common::Entry::BEGIN@257 which was called:
# once (20µs+50µs) by Hailo::Tokenizer::Words::BEGIN@7 at line 268
# spent 125µs (33+93) within Regexp::Common::Entry::__ANON__[/home/hinrik/perl5/perlbrew/perls/perl-5.13.5/lib/site_perl/5.13.5/Regexp/Common.pm:268] which was called:
# once (33µs+93µs) by Hailo::Tokenizer::Words::CORE:regcomp at line 65 of lib/Hailo/Tokenizer/Words.pm | ||||
| 258 | 1 | 1µs | my ($self) = @_; | ||
| 259 | 1 | 7µs | 1 | 80µs | my $pat = $self->{create}->($self, $self->{flags}, $self->{args}); # spent 80µs making 1 call to Regexp::Common::URI::__ANON__[Regexp/Common/URI.pm:42] |
| 260 | 1 | 4µs | if (exists $self->{flags}{-keep}) { | ||
| 261 | $pat =~ s/\Q(?k:/(/g; | ||||
| 262 | } | ||||
| 263 | else { | ||||
| 264 | 1 | 20µs | 1 | 12µs | $pat =~ s/\Q(?k:/(?:/g; # spent 12µs making 1 call to Regexp::Common::Entry::CORE:subst |
| 265 | } | ||||
| 266 | 1 | 2µs | if (exists $self->{flags}{-i}) { $pat = "(?i)$pat" } | ||
| 267 | 1 | 11µs | return $pat; | ||
| 268 | 2 | 280µs | 2 | 120µs | }; # spent 70µs making 1 call to Regexp::Common::Entry::BEGIN@257
# spent 50µs making 1 call to overload::import |
| 269 | |||||
| 270 | # spent 11µs within Regexp::Common::Entry::_clone_with which was called:
# once (11µs+0s) by Regexp::Common::_decache at line 160 | ||||
| 271 | 1 | 2µs | my ($self, $args, $flags) = @_; | ||
| 272 | 1 | 11µs | bless { %$self, args=>$args, flags=>$flags }, ref $self; | ||
| 273 | } | ||||
| 274 | |||||
| 275 | |||||
| 276 | =pod | ||||
| 277 | |||||
| 278 | =head1 NAME | ||||
| 279 | |||||
| 280 | Regexp::Common - Provide commonly requested regular expressions | ||||
| 281 | |||||
| 282 | =head1 SYNOPSIS | ||||
| 283 | |||||
| 284 | # STANDARD USAGE | ||||
| 285 | |||||
| 286 | use Regexp::Common; | ||||
| 287 | |||||
| 288 | while (<>) { | ||||
| 289 | /$RE{num}{real}/ and print q{a number}; | ||||
| 290 | /$RE{quoted} and print q{a ['"`] quoted string}; | ||||
| 291 | /$RE{delimited}{-delim=>'/'}/ and print q{a /.../ sequence}; | ||||
| 292 | /$RE{balanced}{-parens=>'()'}/ and print q{balanced parentheses}; | ||||
| 293 | /$RE{profanity}/ and print q{a #*@%-ing word}; | ||||
| 294 | } | ||||
| 295 | |||||
| 296 | |||||
| 297 | # SUBROUTINE-BASED INTERFACE | ||||
| 298 | |||||
| 299 | use Regexp::Common 'RE_ALL'; | ||||
| 300 | |||||
| 301 | while (<>) { | ||||
| 302 | $_ =~ RE_num_real() and print q{a number}; | ||||
| 303 | $_ =~ RE_quoted() and print q{a ['"`] quoted string}; | ||||
| 304 | $_ =~ RE_delimited(-delim=>'/') and print q{a /.../ sequence}; | ||||
| 305 | $_ =~ RE_balanced(-parens=>'()'} and print q{balanced parentheses}; | ||||
| 306 | $_ =~ RE_profanity() and print q{a #*@%-ing word}; | ||||
| 307 | } | ||||
| 308 | |||||
| 309 | |||||
| 310 | # IN-LINE MATCHING... | ||||
| 311 | |||||
| 312 | if ( $RE{num}{int}->matches($text) ) {...} | ||||
| 313 | |||||
| 314 | |||||
| 315 | # ...AND SUBSTITUTION | ||||
| 316 | |||||
| 317 | my $cropped = $RE{ws}{crop}->subs($uncropped); | ||||
| 318 | |||||
| 319 | |||||
| 320 | # ROLL-YOUR-OWN PATTERNS | ||||
| 321 | |||||
| 322 | use Regexp::Common 'pattern'; | ||||
| 323 | |||||
| 324 | pattern name => ['name', 'mine'], | ||||
| 325 | create => '(?i:J[.]?\s+A[.]?\s+Perl-Hacker)', | ||||
| 326 | ; | ||||
| 327 | |||||
| 328 | my $name_matcher = $RE{name}{mine}; | ||||
| 329 | |||||
| 330 | pattern name => [ 'lineof', '-char=_' ], | ||||
| 331 | create => sub { | ||||
| 332 | my $flags = shift; | ||||
| 333 | my $char = quotemeta $flags->{-char}; | ||||
| 334 | return '(?:^$char+$)'; | ||||
| 335 | }, | ||||
| 336 | matches => sub { | ||||
| 337 | my ($self, $str) = @_; | ||||
| 338 | return $str !~ /[^$self->{flags}{-char}]/; | ||||
| 339 | }, | ||||
| 340 | subs => sub { | ||||
| 341 | my ($self, $str, $replacement) = @_; | ||||
| 342 | $_[1] =~ s/^$self->{flags}{-char}+$//g; | ||||
| 343 | }, | ||||
| 344 | ; | ||||
| 345 | |||||
| 346 | my $asterisks = $RE{lineof}{-char=>'*'}; | ||||
| 347 | |||||
| 348 | # DECIDING WHICH PATTERNS TO LOAD. | ||||
| 349 | |||||
| 350 | use Regexp::Common qw /comment number/; # Comment and number patterns. | ||||
| 351 | use Regexp::Common qw /no_defaults/; # Don't load any patterns. | ||||
| 352 | use Regexp::Common qw /!delimited/; # All, but delimited patterns. | ||||
| 353 | |||||
| 354 | |||||
| 355 | =head1 DESCRIPTION | ||||
| 356 | |||||
| 357 | By default, this module exports a single hash (C<%RE>) that stores or generates | ||||
| 358 | commonly needed regular expressions (see L<"List of available patterns">). | ||||
| 359 | |||||
| 360 | There is an alternative, subroutine-based syntax described in | ||||
| 361 | L<"Subroutine-based interface">. | ||||
| 362 | |||||
| 363 | |||||
| 364 | =head2 General syntax for requesting patterns | ||||
| 365 | |||||
| 366 | To access a particular pattern, C<%RE> is treated as a hierarchical hash of | ||||
| 367 | hashes (of hashes...), with each successive key being an identifier. For | ||||
| 368 | example, to access the pattern that matches real numbers, you | ||||
| 369 | specify: | ||||
| 370 | |||||
| 371 | $RE{num}{real} | ||||
| 372 | |||||
| 373 | and to access the pattern that matches integers: | ||||
| 374 | |||||
| 375 | $RE{num}{int} | ||||
| 376 | |||||
| 377 | Deeper layers of the hash are used to specify I<flags>: arguments that | ||||
| 378 | modify the resulting pattern in some way. The keys used to access these | ||||
| 379 | layers are prefixed with a minus sign and may have a value; if a value | ||||
| 380 | is given, it's done by using a multidimensional key. | ||||
| 381 | For example, to access the pattern that | ||||
| 382 | matches base-2 real numbers with embedded commas separating | ||||
| 383 | groups of three digits (e.g. 10,101,110.110101101): | ||||
| 384 | |||||
| 385 | $RE{num}{real}{-base => 2}{-sep => ','}{-group => 3} | ||||
| 386 | |||||
| 387 | Through the magic of Perl, these flag layers may be specified in any order | ||||
| 388 | (and even interspersed through the identifier keys!) | ||||
| 389 | so you could get the same pattern with: | ||||
| 390 | |||||
| 391 | $RE{num}{real}{-sep => ','}{-group => 3}{-base => 2} | ||||
| 392 | |||||
| 393 | or: | ||||
| 394 | |||||
| 395 | $RE{num}{-base => 2}{real}{-group => 3}{-sep => ','} | ||||
| 396 | |||||
| 397 | or even: | ||||
| 398 | |||||
| 399 | $RE{-base => 2}{-group => 3}{-sep => ','}{num}{real} | ||||
| 400 | |||||
| 401 | etc. | ||||
| 402 | |||||
| 403 | Note, however, that the relative order of amongst the identifier keys | ||||
| 404 | I<is> significant. That is: | ||||
| 405 | |||||
| 406 | $RE{list}{set} | ||||
| 407 | |||||
| 408 | would not be the same as: | ||||
| 409 | |||||
| 410 | $RE{set}{list} | ||||
| 411 | |||||
| 412 | =head2 Flag syntax | ||||
| 413 | |||||
| 414 | In versions prior to 2.113, flags could also be written as | ||||
| 415 | C<{"-flag=value"}>. This no longer works, although C<{"-flag$;value"}> | ||||
| 416 | still does. However, C<< {-flag => 'value'} >> is the preferred syntax. | ||||
| 417 | |||||
| 418 | =head2 Universal flags | ||||
| 419 | |||||
| 420 | Normally, flags are specific to a single pattern. | ||||
| 421 | However, there is two flags that all patterns may specify. | ||||
| 422 | |||||
| 423 | =over 4 | ||||
| 424 | |||||
| 425 | =item C<-keep> | ||||
| 426 | |||||
| 427 | By default, the patterns provided by C<%RE> contain no capturing | ||||
| 428 | parentheses. However, if the C<-keep> flag is specified (it requires | ||||
| 429 | no value) then any significant substrings that the pattern matches | ||||
| 430 | are captured. For example: | ||||
| 431 | |||||
| 432 | if ($str =~ $RE{num}{real}{-keep}) { | ||||
| 433 | $number = $1; | ||||
| 434 | $whole = $3; | ||||
| 435 | $decimals = $5; | ||||
| 436 | } | ||||
| 437 | |||||
| 438 | Special care is needed if a "kept" pattern is interpolated into a | ||||
| 439 | larger regular expression, as the presence of other capturing | ||||
| 440 | parentheses is likely to change the "number variables" into which significant | ||||
| 441 | substrings are saved. | ||||
| 442 | |||||
| 443 | See also L<"Adding new regular expressions">, which describes how to create | ||||
| 444 | new patterns with "optional" capturing brackets that respond to C<-keep>. | ||||
| 445 | |||||
| 446 | =item C<-i> | ||||
| 447 | |||||
| 448 | Some patterns or subpatterns only match lowercase or uppercase letters. | ||||
| 449 | If one wants the do case insensitive matching, one option is to use | ||||
| 450 | the C</i> regexp modifier, or the special sequence C<(?i)>. But if the | ||||
| 451 | functional interface is used, one does not have this option. The | ||||
| 452 | C<-i> switch solves this problem; by using it, the pattern will do | ||||
| 453 | case insensitive matching. | ||||
| 454 | |||||
| 455 | =back | ||||
| 456 | |||||
| 457 | =head2 OO interface and inline matching/substitution | ||||
| 458 | |||||
| 459 | The patterns returned from C<%RE> are objects, so rather than writing: | ||||
| 460 | |||||
| 461 | if ($str =~ /$RE{some}{pattern}/ ) {...} | ||||
| 462 | |||||
| 463 | you can write: | ||||
| 464 | |||||
| 465 | if ( $RE{some}{pattern}->matches($str) ) {...} | ||||
| 466 | |||||
| 467 | For matching this would seem to have no great advantage apart from readability | ||||
| 468 | (but see below). | ||||
| 469 | |||||
| 470 | For substitutions, it has other significant benefits. Frequently you want to | ||||
| 471 | perform a substitution on a string without changing the original. Most people | ||||
| 472 | use this: | ||||
| 473 | |||||
| 474 | $changed = $original; | ||||
| 475 | $changed =~ s/$RE{some}{pattern}/$replacement/; | ||||
| 476 | |||||
| 477 | The more adept use: | ||||
| 478 | |||||
| 479 | ($changed = $original) =~ s/$RE{some}{pattern}/$replacement/; | ||||
| 480 | |||||
| 481 | Regexp::Common allows you do write this: | ||||
| 482 | |||||
| 483 | $changed = $RE{some}{pattern}->subs($original=>$replacement); | ||||
| 484 | |||||
| 485 | Apart from reducing precedence-angst, this approach has the added | ||||
| 486 | advantages that the substitution behaviour can be optimized from the | ||||
| 487 | regular expression, and the replacement string can be provided by | ||||
| 488 | default (see L<"Adding new regular expressions">). | ||||
| 489 | |||||
| 490 | For example, in the implementation of this substitution: | ||||
| 491 | |||||
| 492 | $cropped = $RE{ws}{crop}->subs($uncropped); | ||||
| 493 | |||||
| 494 | the default empty string is provided automatically, and the substitution is | ||||
| 495 | optimized to use: | ||||
| 496 | |||||
| 497 | $uncropped =~ s/^\s+//; | ||||
| 498 | $uncropped =~ s/\s+$//; | ||||
| 499 | |||||
| 500 | rather than: | ||||
| 501 | |||||
| 502 | $uncropped =~ s/^\s+|\s+$//g; | ||||
| 503 | |||||
| 504 | |||||
| 505 | =head2 Subroutine-based interface | ||||
| 506 | |||||
| 507 | The hash-based interface was chosen because it allows regexes to be | ||||
| 508 | effortlessly interpolated, and because it also allows them to be | ||||
| 509 | "curried". For example: | ||||
| 510 | |||||
| 511 | my $num = $RE{num}{int}; | ||||
| 512 | |||||
| 513 | my $commad = $num->{-sep=>','}{-group=>3}; | ||||
| 514 | my $duodecimal = $num->{-base=>12}; | ||||
| 515 | |||||
| 516 | |||||
| 517 | However, the use of tied hashes does make the access to Regexp::Common | ||||
| 518 | patterns slower than it might otherwise be. In contexts where impatience | ||||
| 519 | overrules laziness, Regexp::Common provides an additional | ||||
| 520 | subroutine-based interface. | ||||
| 521 | |||||
| 522 | For each (sub-)entry in the C<%RE> hash (C<$RE{key1}{key2}{etc}>), there | ||||
| 523 | is a corresponding exportable subroutine: C<RE_key1_key2_etc()>. The name of | ||||
| 524 | each subroutine is the underscore-separated concatenation of the I<non-flag> | ||||
| 525 | keys that locate the same pattern in C<%RE>. Flags are passed to the subroutine | ||||
| 526 | in its argument list. Thus: | ||||
| 527 | |||||
| 528 | use Regexp::Common qw( RE_ws_crop RE_num_real RE_profanity ); | ||||
| 529 | |||||
| 530 | $str =~ RE_ws_crop() and die "Surrounded by whitespace"; | ||||
| 531 | |||||
| 532 | $str =~ RE_num_real(-base=>8, -sep=>" ") or next; | ||||
| 533 | |||||
| 534 | $offensive = RE_profanity(-keep); | ||||
| 535 | $str =~ s/$offensive/$bad{$1}++; "<expletive deleted>"/ge; | ||||
| 536 | |||||
| 537 | Note that, unlike the hash-based interface (which returns objects), these | ||||
| 538 | subroutines return ordinary C<qr>'d regular expressions. Hence they do not | ||||
| 539 | curry, nor do they provide the OO match and substitution inlining described | ||||
| 540 | in the previous section. | ||||
| 541 | |||||
| 542 | It is also possible to export subroutines for all available patterns like so: | ||||
| 543 | |||||
| 544 | use Regexp::Common 'RE_ALL'; | ||||
| 545 | |||||
| 546 | Or you can export all subroutines with a common prefix of keys like so: | ||||
| 547 | |||||
| 548 | use Regexp::Common 'RE_num_ALL'; | ||||
| 549 | |||||
| 550 | which will export C<RE_num_int> and C<RE_num_real> (and if you have | ||||
| 551 | create more patterns who have first key I<num>, those will be exported | ||||
| 552 | as well). In general, I<RE_key1_..._keyn_ALL> will export all subroutines | ||||
| 553 | whose pattern names have first keys I<key1> ... I<keyn>. | ||||
| 554 | |||||
| 555 | |||||
| 556 | =head2 Adding new regular expressions | ||||
| 557 | |||||
| 558 | You can add your own regular expressions to the C<%RE> hash at run-time, | ||||
| 559 | using the exportable C<pattern> subroutine. It expects a hash-like list of | ||||
| 560 | key/value pairs that specify the behaviour of the pattern. The various | ||||
| 561 | possible argument pairs are: | ||||
| 562 | |||||
| 563 | =over 4 | ||||
| 564 | |||||
| 565 | =item C<name =E<gt> [ @list ]> | ||||
| 566 | |||||
| 567 | A required argument that specifies the name of the pattern, and any | ||||
| 568 | flags it may take, via a reference to a list of strings. For example: | ||||
| 569 | |||||
| 570 | pattern name => [qw( line of -char )], | ||||
| 571 | # other args here | ||||
| 572 | ; | ||||
| 573 | |||||
| 574 | This specifies an entry C<$RE{line}{of}>, which may take a C<-char> flag. | ||||
| 575 | |||||
| 576 | Flags may also be specified with a default value, which is then used whenever | ||||
| 577 | the flag is specified without an explicit value (but not when the flag is | ||||
| 578 | omitted). For example: | ||||
| 579 | |||||
| 580 | pattern name => [qw( line of -char=_ )], | ||||
| 581 | # default char is '_' | ||||
| 582 | # other args here | ||||
| 583 | ; | ||||
| 584 | |||||
| 585 | |||||
| 586 | =item C<create =E<gt> $sub_ref_or_string> | ||||
| 587 | |||||
| 588 | A required argument that specifies either a string that is to be returned | ||||
| 589 | as the pattern: | ||||
| 590 | |||||
| 591 | pattern name => [qw( line of underscores )], | ||||
| 592 | create => q/(?:^_+$)/ | ||||
| 593 | ; | ||||
| 594 | |||||
| 595 | or a reference to a subroutine that will be called to create the pattern: | ||||
| 596 | |||||
| 597 | pattern name => [qw( line of -char=_ )], | ||||
| 598 | create => sub { | ||||
| 599 | my ($self, $flags) = @_; | ||||
| 600 | my $char = quotemeta $flags->{-char}; | ||||
| 601 | return '(?:^$char+$)'; | ||||
| 602 | }, | ||||
| 603 | ; | ||||
| 604 | |||||
| 605 | If the subroutine version is used, the subroutine will be called with | ||||
| 606 | three arguments: a reference to the pattern object itself, a reference | ||||
| 607 | to a hash containing the flags and their values, | ||||
| 608 | and a reference to an array containing the non-flag keys. | ||||
| 609 | |||||
| 610 | Whatever the subroutine returns is stringified as the pattern. | ||||
| 611 | |||||
| 612 | No matter how the pattern is created, it is immediately postprocessed to | ||||
| 613 | include or exclude capturing parentheses (according to the value of the | ||||
| 614 | C<-keep> flag). To specify such "optional" capturing parentheses within | ||||
| 615 | the regular expression associated with C<create>, use the notation | ||||
| 616 | C<(?k:...)>. Any parentheses of this type will be converted to C<(...)> | ||||
| 617 | when the C<-keep> flag is specified, or C<(?:...)> when it is not. | ||||
| 618 | It is a Regexp::Common convention that the outermost capturing parentheses | ||||
| 619 | always capture the entire pattern, but this is not enforced. | ||||
| 620 | |||||
| 621 | |||||
| 622 | =item C<matches =E<gt> $sub_ref> | ||||
| 623 | |||||
| 624 | An optional argument that specifies a subroutine that is to be called when | ||||
| 625 | the C<$RE{...}-E<gt>matches(...)> method of this pattern is invoked. | ||||
| 626 | |||||
| 627 | The subroutine should expect two arguments: a reference to the pattern object | ||||
| 628 | itself, and the string to be matched against. | ||||
| 629 | |||||
| 630 | It should return the same types of values as a C<m/.../> does. | ||||
| 631 | |||||
| 632 | pattern name => [qw( line of -char )], | ||||
| 633 | create => sub {...}, | ||||
| 634 | matches => sub { | ||||
| 635 | my ($self, $str) = @_; | ||||
| 636 | $str !~ /[^$self->{flags}{-char}]/; | ||||
| 637 | }, | ||||
| 638 | ; | ||||
| 639 | |||||
| 640 | |||||
| 641 | =item C<subs =E<gt> $sub_ref> | ||||
| 642 | |||||
| 643 | An optional argument that specifies a subroutine that is to be called when | ||||
| 644 | the C<$RE{...}-E<gt>subs(...)> method of this pattern is invoked. | ||||
| 645 | |||||
| 646 | The subroutine should expect three arguments: a reference to the pattern object | ||||
| 647 | itself, the string to be changed, and the value to be substituted into it. | ||||
| 648 | The third argument may be C<undef>, indicating the default substitution is | ||||
| 649 | required. | ||||
| 650 | |||||
| 651 | The subroutine should return the same types of values as an C<s/.../.../> does. | ||||
| 652 | |||||
| 653 | For example: | ||||
| 654 | |||||
| 655 | pattern name => [ 'lineof', '-char=_' ], | ||||
| 656 | create => sub {...}, | ||||
| 657 | subs => sub { | ||||
| 658 | my ($self, $str, $ignore_replacement) = @_; | ||||
| 659 | $_[1] =~ s/^$self->{flags}{-char}+$//g; | ||||
| 660 | }, | ||||
| 661 | ; | ||||
| 662 | |||||
| 663 | Note that such a subroutine will almost always need to modify C<$_[1]> directly. | ||||
| 664 | |||||
| 665 | |||||
| 666 | =item C<version =E<gt> $minimum_perl_version> | ||||
| 667 | |||||
| 668 | If this argument is given, it specifies the minimum version of perl required | ||||
| 669 | to use the new pattern. Attempts to use the pattern with earlier versions of | ||||
| 670 | perl will generate a fatal diagnostic. | ||||
| 671 | |||||
| 672 | =back | ||||
| 673 | |||||
| 674 | =head2 Loading specific sets of patterns. | ||||
| 675 | |||||
| 676 | By default, all the sets of patterns listed below are made available. | ||||
| 677 | However, it is possible to indicate which sets of patterns should | ||||
| 678 | be made available - the wanted sets should be given as arguments to | ||||
| 679 | C<use>. Alternatively, it is also possible to indicate which sets of | ||||
| 680 | patterns should not be made available - those sets will be given as | ||||
| 681 | argument to the C<use> statement, but are preceeded with an exclaimation | ||||
| 682 | mark. The argument I<no_defaults> indicates none of the default patterns | ||||
| 683 | should be made available. This is useful for instance if all you want | ||||
| 684 | is the C<pattern()> subroutine. | ||||
| 685 | |||||
| 686 | Examples: | ||||
| 687 | |||||
| 688 | use Regexp::Common qw /comment number/; # Comment and number patterns. | ||||
| 689 | use Regexp::Common qw /no_defaults/; # Don't load any patterns. | ||||
| 690 | use Regexp::Common qw /!delimited/; # All, but delimited patterns. | ||||
| 691 | |||||
| 692 | It's also possible to load your own set of patterns. If you have a | ||||
| 693 | module C<Regexp::Common::my_patterns> that makes patterns available, | ||||
| 694 | you can have it made available with | ||||
| 695 | |||||
| 696 | use Regexp::Common qw /my_patterns/; | ||||
| 697 | |||||
| 698 | Note that the default patterns will still be made available - only if | ||||
| 699 | you use I<no_defaults>, or mention one of the default sets explicitely, | ||||
| 700 | the non mentioned defaults aren't made available. | ||||
| 701 | |||||
| 702 | =head2 List of available patterns | ||||
| 703 | |||||
| 704 | The patterns listed below are currently available. Each set of patterns | ||||
| 705 | has its own manual page describing the details. For each pattern set | ||||
| 706 | named I<name>, the manual page I<Regexp::Common::name> describes the | ||||
| 707 | details. | ||||
| 708 | |||||
| 709 | Currently available are: | ||||
| 710 | |||||
| 711 | =over 4 | ||||
| 712 | |||||
| 713 | =item Regexp::Common::balanced | ||||
| 714 | |||||
| 715 | Provides regexes for strings with balanced parenthesized delimiters. | ||||
| 716 | |||||
| 717 | =item Regexp::Common::comment | ||||
| 718 | |||||
| 719 | Provides regexes for comments of various languages (43 languages | ||||
| 720 | currently). | ||||
| 721 | |||||
| 722 | =item Regexp::Common::delimited | ||||
| 723 | |||||
| 724 | Provides regexes for delimited strings. | ||||
| 725 | |||||
| 726 | =item Regexp::Common::lingua | ||||
| 727 | |||||
| 728 | Provides regexes for palindromes. | ||||
| 729 | |||||
| 730 | =item Regexp::Common::list | ||||
| 731 | |||||
| 732 | Provides regexes for lists. | ||||
| 733 | |||||
| 734 | =item Regexp::Common::net | ||||
| 735 | |||||
| 736 | Provides regexes for IPv4 addresses and MAC addresses. | ||||
| 737 | |||||
| 738 | =item Regexp::Common::number | ||||
| 739 | |||||
| 740 | Provides regexes for numbers (integers and reals). | ||||
| 741 | |||||
| 742 | =item Regexp::Common::profanity | ||||
| 743 | |||||
| 744 | Provides regexes for profanity. | ||||
| 745 | |||||
| 746 | =item Regexp::Common::whitespace | ||||
| 747 | |||||
| 748 | Provides regexes for leading and trailing whitespace. | ||||
| 749 | |||||
| 750 | =item Regexp::Common::zip | ||||
| 751 | |||||
| 752 | Provides regexes for zip codes. | ||||
| 753 | |||||
| 754 | =back | ||||
| 755 | |||||
| 756 | =head2 Forthcoming patterns and features | ||||
| 757 | |||||
| 758 | Future releases of the module will also provide patterns for the following: | ||||
| 759 | |||||
| 760 | * email addresses | ||||
| 761 | * HTML/XML tags | ||||
| 762 | * more numerical matchers, | ||||
| 763 | * mail headers (including multiline ones), | ||||
| 764 | * more URLS | ||||
| 765 | * telephone numbers of various countries | ||||
| 766 | * currency (universal 3 letter format, Latin-1, currency names) | ||||
| 767 | * dates | ||||
| 768 | * binary formats (e.g. UUencoded, MIMEd) | ||||
| 769 | |||||
| 770 | If you have other patterns or pattern generators that you think would be | ||||
| 771 | generally useful, please send them to the maintainer -- preferably as source | ||||
| 772 | code using the C<pattern> subroutine. Submissions that include a set of | ||||
| 773 | tests will be especially welcome. | ||||
| 774 | |||||
| 775 | |||||
| 776 | =head1 DIAGNOSTICS | ||||
| 777 | |||||
| 778 | =over 4 | ||||
| 779 | |||||
| 780 | =item C<Can't export unknown subroutine %s> | ||||
| 781 | |||||
| 782 | The subroutine-based interface didn't recognize the requested subroutine. | ||||
| 783 | Often caused by a spelling mistake or an incompletely specified name. | ||||
| 784 | |||||
| 785 | |||||
| 786 | =item C<Can't create unknown regex: $RE{...}> | ||||
| 787 | |||||
| 788 | Regexp::Common doesn't have a generator for the requested pattern. | ||||
| 789 | Often indicates a mispelt or missing parameter. | ||||
| 790 | |||||
| 791 | =item | ||||
| 792 | C<Perl %f does not support the pattern $RE{...}. | ||||
| 793 | You need Perl %f or later> | ||||
| 794 | |||||
| 795 | The requested pattern requires advanced regex features (e.g. recursion) | ||||
| 796 | that not available in your version of Perl. Time to upgrade. | ||||
| 797 | |||||
| 798 | =item C<< pattern() requires argument: name => [ @list ] >> | ||||
| 799 | |||||
| 800 | Every user-defined pattern specification must have a name. | ||||
| 801 | |||||
| 802 | =item C<< pattern() requires argument: create => $sub_ref_or_string >> | ||||
| 803 | |||||
| 804 | Every user-defined pattern specification must provide a pattern creation | ||||
| 805 | mechanism: either a pattern string or a reference to a subroutine that | ||||
| 806 | returns the pattern string. | ||||
| 807 | |||||
| 808 | =item C<Base must be between 1 and 36> | ||||
| 809 | |||||
| 810 | The C<< $RE{num}{real}{-base=>'I<N>'} >> pattern uses the characters [0-9A-Z] | ||||
| 811 | to represent the digits of various bases. Hence it only produces | ||||
| 812 | regular expressions for bases up to hexatricensimal. | ||||
| 813 | |||||
| 814 | =item C<Must specify delimiter in $RE{delimited}> | ||||
| 815 | |||||
| 816 | The pattern has no default delimiter. | ||||
| 817 | You need to write: C<< $RE{delimited}{-delim=>I<X>'} >> for some character I<X> | ||||
| 818 | |||||
| 819 | =back | ||||
| 820 | |||||
| 821 | =head1 ACKNOWLEDGEMENTS | ||||
| 822 | |||||
| 823 | Deepest thanks to the many people who have encouraged and contributed to this | ||||
| 824 | project, especially: Elijah, Jarkko, Tom, Nat, Ed, and Vivek. | ||||
| 825 | |||||
| 826 | Further thanks go to: Alexandr Ciornii, Blair Zajac, Bob Stockdale, | ||||
| 827 | Charles Thomas, Chris Vertonghen, the CPAN Testers, David Hand, | ||||
| 828 | Fany, Geoffrey Leach, Hermann-Marcus Behrens, Jerome Quelin, Jim Cromie, | ||||
| 829 | Lars Wilke, Linda Julien, Mike Arms, Mike Castle, Mikko, Murat Uenalan, | ||||
| 830 | RafaE<235>l Garcia-Suarez, Ron Savage, Sam Vilain, Slaven Rezic, Smylers, | ||||
| 831 | Tim Maher, and all the others I've forgotten. | ||||
| 832 | |||||
| 833 | =head1 AUTHOR | ||||
| 834 | |||||
| 835 | Damian Conway (damian@conway.org) | ||||
| 836 | |||||
| 837 | =head1 MAINTAINANCE | ||||
| 838 | |||||
| 839 | This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>. | ||||
| 840 | |||||
| 841 | =head1 BUGS AND IRRITATIONS | ||||
| 842 | |||||
| 843 | Bound to be plenty. | ||||
| 844 | |||||
| 845 | For a start, there are many common regexes missing. | ||||
| 846 | Send them in to I<regexp-common@abigail.be>. | ||||
| 847 | |||||
| 848 | There are some POD issues when installing this module using a pre-5.6.0 perl; | ||||
| 849 | some manual pages may not install, or may not install correctly using a perl | ||||
| 850 | that is that old. You might consider upgrading your perl. | ||||
| 851 | |||||
| 852 | =head1 LICENSE and COPYRIGHT | ||||
| 853 | |||||
| 854 | This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail. | ||||
| 855 | |||||
| 856 | This module is free software, and maybe used under any of the following | ||||
| 857 | licenses: | ||||
| 858 | |||||
| 859 | 1) The Perl Artistic License. See the file COPYRIGHT.AL. | ||||
| 860 | 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2. | ||||
| 861 | 3) The BSD Licence. See the file COPYRIGHT.BSD. | ||||
| 862 | 4) The MIT Licence. See the file COPYRIGHT.MIT. | ||||
# spent 225µs within Regexp::Common::CORE:match which was called 183 times, avg 1µs/call:
# 70 times (91µs+0s) by Regexp::Common::pattern at line 194, avg 1µs/call
# 55 times (68µs+0s) by Regexp::Common::import at line 67, avg 1µs/call
# 55 times (62µs+0s) by Regexp::Common::import at line 109, avg 1µs/call
# 2 times (3µs+0s) by Regexp::Common::_decache at line 155, avg 1µs/call
# once (2µs+0s) by Regexp::Common::_decache at line 146 | |||||
# spent 9µs within Regexp::Common::CORE:qr which was called:
# once (9µs+0s) by Hailo::Tokenizer::Words::BEGIN@7 at line 142 | |||||
sub Regexp::Common::CORE:regcomp; # opcode | |||||
# spent 45µs within Regexp::Common::CORE:subst which was called 33 times, avg 1µs/call:
# 33 times (45µs+0s) by Regexp::Common::pattern at line 221, avg 1µs/call | |||||
# spent 12µs within Regexp::Common::Entry::CORE:subst which was called:
# once (12µs+0s) by Regexp::Common::Entry::__ANON__[/home/hinrik/perl5/perlbrew/perls/perl-5.13.5/lib/site_perl/5.13.5/Regexp/Common.pm:268] at line 264 |