| Filename | /home/ss5/perl5/perlbrew/perls/perl-5.22.0/lib/site_perl/5.22.0/URI/Escape.pm |
| Statements | Executed 19281 statements in 44.4ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 3001 | 1 | 1 | 19.8ms | 25.1ms | URI::Escape::uri_escape |
| 1001 | 1 | 1 | 4.92ms | 5.44ms | URI::Escape::uri_unescape |
| 3001 | 1 | 1 | 3.46ms | 3.46ms | URI::Escape::CORE:regcomp (opcode) |
| 4002 | 2 | 1 | 2.36ms | 2.36ms | URI::Escape::CORE:subst (opcode) |
| 1 | 1 | 1 | 7µs | 16µs | URI::Escape::BEGIN@140 |
| 1 | 1 | 1 | 6µs | 8µs | URI::Escape::BEGIN@3 |
| 1 | 1 | 1 | 3µs | 6µs | URI::Escape::BEGIN@4 |
| 1 | 1 | 1 | 2µs | 2µs | URI::Escape::BEGIN@146 |
| 2 | 1 | 1 | 1µs | 1µs | URI::Escape::CORE:qr (opcode) |
| 0 | 0 | 0 | 0s | 0s | URI::Escape::_fail_hi |
| 0 | 0 | 0 | 0s | 0s | URI::Escape::escape_char |
| 0 | 0 | 0 | 0s | 0s | URI::Escape::uri_escape_utf8 |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package URI::Escape; | ||||
| 2 | |||||
| 3 | 2 | 11µs | 2 | 9µs | # spent 8µs (6+1) within URI::Escape::BEGIN@3 which was called:
# once (6µs+1µs) by URI::BEGIN@22 at line 3 # spent 8µs making 1 call to URI::Escape::BEGIN@3
# spent 1µs making 1 call to strict::import |
| 4 | 2 | 53µs | 2 | 8µs | # spent 6µs (3+2) within URI::Escape::BEGIN@4 which was called:
# once (3µs+2µs) by URI::BEGIN@22 at line 4 # spent 6µs making 1 call to URI::Escape::BEGIN@4
# spent 2µs making 1 call to warnings::import |
| 5 | |||||
| 6 | =head1 NAME | ||||
| 7 | |||||
| 8 | URI::Escape - Percent-encode and percent-decode unsafe characters | ||||
| 9 | |||||
| 10 | =head1 SYNOPSIS | ||||
| 11 | |||||
| 12 | use URI::Escape; | ||||
| 13 | $safe = uri_escape("10% is enough\n"); | ||||
| 14 | $verysafe = uri_escape("foo", "\0-\377"); | ||||
| 15 | $str = uri_unescape($safe); | ||||
| 16 | |||||
| 17 | =head1 DESCRIPTION | ||||
| 18 | |||||
| 19 | This module provides functions to percent-encode and percent-decode URI strings as | ||||
| 20 | defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping". | ||||
| 21 | This is the terminology used by this module, which predates the formalization of the | ||||
| 22 | terms by the RFC by several years. | ||||
| 23 | |||||
| 24 | A URI consists of a restricted set of characters. The restricted set | ||||
| 25 | of characters consists of digits, letters, and a few graphic symbols | ||||
| 26 | chosen from those common to most of the character encodings and input | ||||
| 27 | facilities available to Internet users. They are made up of the | ||||
| 28 | "unreserved" and "reserved" character sets as defined in RFC 3986. | ||||
| 29 | |||||
| 30 | unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | ||||
| 31 | reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@" | ||||
| 32 | "!" / "$" / "&" / "'" / "(" / ")" | ||||
| 33 | / "*" / "+" / "," / ";" / "=" | ||||
| 34 | |||||
| 35 | In addition, any byte (octet) can be represented in a URI by an escape | ||||
| 36 | sequence: a triplet consisting of the character "%" followed by two | ||||
| 37 | hexadecimal digits. A byte can also be represented directly by a | ||||
| 38 | character, using the US-ASCII character for that octet. | ||||
| 39 | |||||
| 40 | Some of the characters are I<reserved> for use as delimiters or as | ||||
| 41 | part of certain URI components. These must be escaped if they are to | ||||
| 42 | be treated as ordinary data. Read RFC 3986 for further details. | ||||
| 43 | |||||
| 44 | The functions provided (and exported by default) from this module are: | ||||
| 45 | |||||
| 46 | =over 4 | ||||
| 47 | |||||
| 48 | =item uri_escape( $string ) | ||||
| 49 | |||||
| 50 | =item uri_escape( $string, $unsafe ) | ||||
| 51 | |||||
| 52 | Replaces each unsafe character in the $string with the corresponding | ||||
| 53 | escape sequence and returns the result. The $string argument should | ||||
| 54 | be a string of bytes. The uri_escape() function will croak if given a | ||||
| 55 | characters with code above 255. Use uri_escape_utf8() if you know you | ||||
| 56 | have such chars or/and want chars in the 128 .. 255 range treated as | ||||
| 57 | UTF-8. | ||||
| 58 | |||||
| 59 | The uri_escape() function takes an optional second argument that | ||||
| 60 | overrides the set of characters that are to be escaped. The set is | ||||
| 61 | specified as a string that can be used in a regular expression | ||||
| 62 | character class (between [ ]). E.g.: | ||||
| 63 | |||||
| 64 | "\x00-\x1f\x7f-\xff" # all control and hi-bit characters | ||||
| 65 | "a-z" # all lower case characters | ||||
| 66 | "^A-Za-z" # everything not a letter | ||||
| 67 | |||||
| 68 | The default set of characters to be escaped is all those which are | ||||
| 69 | I<not> part of the C<unreserved> character class shown above as well | ||||
| 70 | as the reserved characters. I.e. the default is: | ||||
| 71 | |||||
| 72 | "^A-Za-z0-9\-\._~" | ||||
| 73 | |||||
| 74 | =item uri_escape_utf8( $string ) | ||||
| 75 | |||||
| 76 | =item uri_escape_utf8( $string, $unsafe ) | ||||
| 77 | |||||
| 78 | Works like uri_escape(), but will encode chars as UTF-8 before | ||||
| 79 | escaping them. This makes this function able to deal with characters | ||||
| 80 | with code above 255 in $string. Note that chars in the 128 .. 255 | ||||
| 81 | range will be escaped differently by this function compared to what | ||||
| 82 | uri_escape() would. For chars in the 0 .. 127 range there is no | ||||
| 83 | difference. | ||||
| 84 | |||||
| 85 | Equivalent to: | ||||
| 86 | |||||
| 87 | utf8::encode($string); | ||||
| 88 | my $uri = uri_escape($string); | ||||
| 89 | |||||
| 90 | Note: JavaScript has a function called escape() that produces the | ||||
| 91 | sequence "%uXXXX" for chars in the 256 .. 65535 range. This function | ||||
| 92 | has really nothing to do with URI escaping but some folks got confused | ||||
| 93 | since it "does the right thing" in the 0 .. 255 range. Because of | ||||
| 94 | this you sometimes see "URIs" with these kind of escapes. The | ||||
| 95 | JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). | ||||
| 96 | |||||
| 97 | =item uri_unescape($string,...) | ||||
| 98 | |||||
| 99 | Returns a string with each %XX sequence replaced with the actual byte | ||||
| 100 | (octet). | ||||
| 101 | |||||
| 102 | This does the same as: | ||||
| 103 | |||||
| 104 | $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
| 105 | |||||
| 106 | but does not modify the string in-place as this RE would. Using the | ||||
| 107 | uri_unescape() function instead of the RE might make the code look | ||||
| 108 | cleaner and is a few characters less to type. | ||||
| 109 | |||||
| 110 | In a simple benchmark test I did, | ||||
| 111 | calling the function (instead of the inline RE above) if a few chars | ||||
| 112 | were unescaped was something like 40% slower, and something like 700% slower if none were. If | ||||
| 113 | you are going to unescape a lot of times it might be a good idea to | ||||
| 114 | inline the RE. | ||||
| 115 | |||||
| 116 | If the uri_unescape() function is passed multiple strings, then each | ||||
| 117 | one is returned unescaped. | ||||
| 118 | |||||
| 119 | =back | ||||
| 120 | |||||
| 121 | The module can also export the C<%escapes> hash, which contains the | ||||
| 122 | mapping from all 256 bytes to the corresponding escape codes. Lookup | ||||
| 123 | in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))> | ||||
| 124 | each time. | ||||
| 125 | |||||
| 126 | =head1 SEE ALSO | ||||
| 127 | |||||
| 128 | L<URI> | ||||
| 129 | |||||
| 130 | |||||
| 131 | =head1 COPYRIGHT | ||||
| 132 | |||||
| 133 | Copyright 1995-2004 Gisle Aas. | ||||
| 134 | |||||
| 135 | This program is free software; you can redistribute it and/or modify | ||||
| 136 | it under the same terms as Perl itself. | ||||
| 137 | |||||
| 138 | =cut | ||||
| 139 | |||||
| 140 | 3 | 40µs | 3 | 26µs | # spent 16µs (7+10) within URI::Escape::BEGIN@140 which was called:
# once (7µs+10µs) by URI::BEGIN@22 at line 140 # spent 16µs making 1 call to URI::Escape::BEGIN@140
# spent 5µs making 1 call to UNIVERSAL::VERSION
# spent 4µs making 1 call to Exporter::import |
| 141 | our %escapes; | ||||
| 142 | 1 | 800ns | our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); | ||
| 143 | 1 | 300ns | our @EXPORT_OK = qw(%escapes); | ||
| 144 | 1 | 200ns | our $VERSION = "3.31"; | ||
| 145 | |||||
| 146 | 2 | 284µs | 1 | 2µs | # spent 2µs within URI::Escape::BEGIN@146 which was called:
# once (2µs+0s) by URI::BEGIN@22 at line 146 # spent 2µs making 1 call to URI::Escape::BEGIN@146 |
| 147 | |||||
| 148 | # Build a char->hex map | ||||
| 149 | 1 | 1µs | for (0..255) { | ||
| 150 | 256 | 165µs | $escapes{chr($_)} = sprintf("%%%02X", $_); | ||
| 151 | } | ||||
| 152 | |||||
| 153 | 1 | 200ns | my %subst; # compiled patterns | ||
| 154 | |||||
| 155 | 1 | 8µs | 2 | 1µs | my %Unsafe = ( # spent 1µs making 2 calls to URI::Escape::CORE:qr, avg 650ns/call |
| 156 | RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/, | ||||
| 157 | RFC3986 => qr/[^A-Za-z0-9\-\._~]/, | ||||
| 158 | ); | ||||
| 159 | |||||
| 160 | # spent 25.1ms (19.8+5.31) within URI::Escape::uri_escape which was called 3001 times, avg 8µs/call:
# 3001 times (19.8ms+5.31ms) by Search::Elasticsearch::Role::Client::Direct::_parse_path at line 67 of Search/Elasticsearch/Role/Client/Direct.pm, avg 8µs/call | ||||
| 161 | 3001 | 776µs | my($text, $patn) = @_; | ||
| 162 | 3001 | 652µs | return undef unless defined $text; | ||
| 163 | 3001 | 1.07ms | if (defined $patn){ | ||
| 164 | unless (exists $subst{$patn}) { | ||||
| 165 | # Because we can't compile the regex we fake it with a cached sub | ||||
| 166 | (my $tmp = $patn) =~ s,/,\\/,g; | ||||
| 167 | eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }"; | ||||
| 168 | Carp::croak("uri_escape: $@") if $@; | ||||
| 169 | } | ||||
| 170 | &{$subst{$patn}}($text); | ||||
| 171 | } else { | ||||
| 172 | 3001 | 19.8ms | 6002 | 5.31ms | $text =~ s/($Unsafe{RFC3986})/$escapes{$1} || _fail_hi($1)/ge; # spent 3.46ms making 3001 calls to URI::Escape::CORE:regcomp, avg 1µs/call
# spent 1.84ms making 3001 calls to URI::Escape::CORE:subst, avg 615ns/call |
| 173 | } | ||||
| 174 | 3001 | 16.1ms | $text; | ||
| 175 | } | ||||
| 176 | |||||
| 177 | sub _fail_hi { | ||||
| 178 | my $chr = shift; | ||||
| 179 | Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); | ||||
| 180 | } | ||||
| 181 | |||||
| 182 | sub uri_escape_utf8 { | ||||
| 183 | my $text = shift; | ||||
| 184 | utf8::encode($text); | ||||
| 185 | return uri_escape($text, @_); | ||||
| 186 | } | ||||
| 187 | |||||
| 188 | # spent 5.44ms (4.92+515µs) within URI::Escape::uri_unescape which was called 1001 times, avg 5µs/call:
# 1001 times (4.92ms+515µs) by URI::_server::host at line 97 of URI/_server.pm, avg 5µs/call | ||||
| 189 | # Note from RFC1630: "Sequences which start with a percent sign | ||||
| 190 | # but are not followed by two hexadecimal characters are reserved | ||||
| 191 | # for future extension" | ||||
| 192 | 1001 | 456µs | my $str = shift; | ||
| 193 | 1001 | 435µs | if (@_ && wantarray) { | ||
| 194 | # not executed for the common case of a single argument | ||||
| 195 | my @str = ($str, @_); # need to copy | ||||
| 196 | for (@str) { | ||||
| 197 | s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
| 198 | } | ||||
| 199 | return @str; | ||||
| 200 | } | ||||
| 201 | 1001 | 2.79ms | 1001 | 515µs | $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; # spent 515µs making 1001 calls to URI::Escape::CORE:subst, avg 515ns/call |
| 202 | 1001 | 1.71ms | $str; | ||
| 203 | } | ||||
| 204 | |||||
| 205 | # XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format. | ||||
| 206 | sub escape_char { | ||||
| 207 | # Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1). | ||||
| 208 | # The following forces a fetch to occur beforehand. | ||||
| 209 | my $dummy = substr($_[0], 0, 0); | ||||
| 210 | |||||
| 211 | if (utf8::is_utf8($_[0])) { | ||||
| 212 | my $s = shift; | ||||
| 213 | utf8::encode($s); | ||||
| 214 | unshift(@_, $s); | ||||
| 215 | } | ||||
| 216 | |||||
| 217 | return join '', @URI::Escape::escapes{split //, $_[0]}; | ||||
| 218 | } | ||||
| 219 | |||||
| 220 | 1 | 4µs | 1; | ||
# spent 1µs within URI::Escape::CORE:qr which was called 2 times, avg 650ns/call:
# 2 times (1µs+0s) by URI::BEGIN@22 at line 155, avg 650ns/call | |||||
# spent 3.46ms within URI::Escape::CORE:regcomp which was called 3001 times, avg 1µs/call:
# 3001 times (3.46ms+0s) by URI::Escape::uri_escape at line 172, avg 1µs/call | |||||
sub URI::Escape::CORE:subst; # opcode |