| Filename | /home/ss5/perl5/perlbrew/perls/perl-5.14.1/lib/site_perl/5.14.1/Data/DPath.pm |
| Statements | Executed 20 statements in 4.60ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 12.0ms | 36.1ms | Data::DPath::BEGIN@48 |
| 1 | 1 | 1 | 5.08ms | 444ms | Data::DPath::BEGIN@18 |
| 1 | 1 | 1 | 63µs | 63µs | Data::DPath::BEGIN@10 |
| 1 | 1 | 1 | 41µs | 58µs | Data::DPath::BEGIN@11 |
| 1 | 1 | 1 | 36µs | 36µs | Data::DPath::BEGIN@2 |
| 1 | 1 | 1 | 29µs | 57µs | Data::DPath::BEGIN@12 |
| 1 | 1 | 1 | 18µs | 18µs | Data::DPath::BEGIN@19 |
| 1 | 1 | 1 | 18µs | 18µs | Data::DPath::build_dpath |
| 0 | 0 | 0 | 0s | 0s | Data::DPath::__ANON__[:25] |
| 0 | 0 | 0 | 0s | 0s | Data::DPath::__ANON__[:32] |
| 0 | 0 | 0 | 0s | 0s | Data::DPath::__ANON__[:45] |
| 0 | 0 | 0 | 0s | 0s | Data::DPath::build_dpathi |
| 0 | 0 | 0 | 0s | 0s | Data::DPath::build_dpathr |
| 0 | 0 | 0 | 0s | 0s | Data::DPath::match |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package Data::DPath; | ||||
| 2 | # spent 36µs within Data::DPath::BEGIN@2 which was called:
# once (36µs+0s) by main::BEGIN@10 at line 4 | ||||
| 3 | 1 | 18µs | $Data::DPath::AUTHORITY = 'cpan:SCHWIGON'; | ||
| 4 | 1 | 121µs | 1 | 36µs | } # spent 36µs making 1 call to Data::DPath::BEGIN@2 |
| 5 | { | ||||
| 6 | 2 | 6µs | $Data::DPath::VERSION = '0.48'; | ||
| 7 | } | ||||
| 8 | # ABSTRACT: DPath is not XPath! | ||||
| 9 | |||||
| 10 | 2 | 176µs | 1 | 63µs | # spent 63µs within Data::DPath::BEGIN@10 which was called:
# once (63µs+0s) by main::BEGIN@10 at line 10 # spent 63µs making 1 call to Data::DPath::BEGIN@10 |
| 11 | 2 | 92µs | 2 | 74µs | # spent 58µs (41+17) within Data::DPath::BEGIN@11 which was called:
# once (41µs+17µs) by main::BEGIN@10 at line 11 # spent 58µs making 1 call to Data::DPath::BEGIN@11
# spent 17µs making 1 call to strict::import |
| 12 | 2 | 207µs | 2 | 85µs | # spent 57µs (29+28) within Data::DPath::BEGIN@12 which was called:
# once (29µs+28µs) by main::BEGIN@10 at line 12 # spent 57µs making 1 call to Data::DPath::BEGIN@12
# spent 28µs making 1 call to warnings::import |
| 13 | |||||
| 14 | 1 | 1µs | our $DEBUG = 0; | ||
| 15 | 1 | 800ns | our $USE_SAFE = 1; | ||
| 16 | 1 | 600ns | our $PARALLELIZE = 0; | ||
| 17 | |||||
| 18 | 2 | 412µs | 1 | 444ms | # spent 444ms (5.08+439) within Data::DPath::BEGIN@18 which was called:
# once (5.08ms+439ms) by main::BEGIN@10 at line 18 # spent 444ms making 1 call to Data::DPath::BEGIN@18 |
| 19 | 2 | 868µs | 1 | 18µs | # spent 18µs within Data::DPath::BEGIN@19 which was called:
# once (18µs+0s) by main::BEGIN@10 at line 19 # spent 18µs making 1 call to Data::DPath::BEGIN@19 |
| 20 | |||||
| 21 | # spent 18µs within Data::DPath::build_dpath which was called:
# once (18µs+0s) by Sub::Exporter::default_generator at line 856 of Sub/Exporter.pm | ||||
| 22 | return sub ($) { | ||||
| 23 | my ($path_str) = @_; | ||||
| 24 | Data::DPath::Path->new(path => $path_str); | ||||
| 25 | 1 | 26µs | }; | ||
| 26 | } | ||||
| 27 | |||||
| 28 | sub build_dpathr { | ||||
| 29 | return sub ($) { | ||||
| 30 | my ($path_str) = @_; | ||||
| 31 | Data::DPath::Path->new(path => $path_str, give_references => 1); | ||||
| 32 | }; | ||||
| 33 | } | ||||
| 34 | |||||
| 35 | sub build_dpathi { | ||||
| 36 | return sub ($) { | ||||
| 37 | my ($data, $path_str) = @_; | ||||
| 38 | |||||
| 39 | Data::DPath::Context | ||||
| 40 | ->new | ||||
| 41 | ->current_points([ Data::DPath::Point->new->ref(\$data) ]) | ||||
| 42 | ->_search(Data::DPath::Path->new(path => "/")) | ||||
| 43 | ->_iter | ||||
| 44 | ->value; # there is always exactly one root "/" | ||||
| 45 | }; | ||||
| 46 | } | ||||
| 47 | |||||
| 48 | 1 | 1.80ms | # spent 36.1ms (12.0+24.1) within Data::DPath::BEGIN@48 which was called:
# once (12.0ms+24.1ms) by main::BEGIN@10 at line 54 # spent 1.80ms making 1 call to Sub::Exporter::__ANON__[Sub/Exporter.pm:756] | ||
| 49 | exports => [ dpath => \&build_dpath, | ||||
| 50 | dpathr => \&build_dpathr, | ||||
| 51 | dpathi => \&build_dpathi, | ||||
| 52 | ], | ||||
| 53 | groups => { all => [ 'dpath', 'dpathr' ] }, | ||||
| 54 | 2 | 2.65ms | 1 | 36.1ms | }; # spent 36.1ms making 1 call to Data::DPath::BEGIN@48 |
| 55 | |||||
| 56 | sub match { | ||||
| 57 | my ($class, $data, $path_str) = @_; | ||||
| 58 | Data::DPath::Path->new(path => $path_str)->match($data); | ||||
| 59 | } | ||||
| 60 | |||||
| 61 | # ------------------------------------------------------------ | ||||
| 62 | |||||
| 63 | 1 | 19µs | 1; | ||
| 64 | |||||
| - - | |||||
| 67 | =pod | ||||
| 68 | |||||
| 69 | =encoding utf-8 | ||||
| 70 | |||||
| 71 | =head1 NAME | ||||
| 72 | |||||
| 73 | Data::DPath - DPath is not XPath! | ||||
| 74 | |||||
| 75 | =head1 SYNOPSIS | ||||
| 76 | |||||
| 77 | use Data::DPath 'dpath'; | ||||
| 78 | my $data = { | ||||
| 79 | AAA => { BBB => { CCC => [ qw/ XXX YYY ZZZ / ] }, | ||||
| 80 | RRR => { CCC => [ qw/ RR1 RR2 RR3 / ] }, | ||||
| 81 | DDD => { EEE => [ qw/ uuu vvv www / ] }, | ||||
| 82 | }, | ||||
| 83 | }; | ||||
| 84 | |||||
| 85 | # Perl 5.8 style | ||||
| 86 | @resultlist = dpath('/AAA/*/CCC')->match($data); # ( ['XXX', 'YYY', 'ZZZ'], [ 'RR1', 'RR2', 'RR3' ] ) | ||||
| 87 | |||||
| 88 | # Perl 5.10 style using overloaded smartmatch operator | ||||
| 89 | $resultlist = $data ~~ dpath '/AAA/*/CCC'; # [ ['XXX', 'YYY', 'ZZZ'], [ 'RR1', 'RR2', 'RR3' ] ] | ||||
| 90 | |||||
| 91 | Note that the C<match()> function returns an array but the overloaded | ||||
| 92 | C<~~> operator returns an array reference (that's a limitation of | ||||
| 93 | overloading). | ||||
| 94 | |||||
| 95 | Various other example paths from C<t/data_dpath.t> (not neccessarily | ||||
| 96 | fitting to above data structure): | ||||
| 97 | |||||
| 98 | $data ~~ dpath '/AAA/*/CCC' | ||||
| 99 | $data ~~ dpath '/AAA/BBB/CCC/../..' # parents (..) | ||||
| 100 | $data ~~ dpath '//AAA' # anywhere (//) | ||||
| 101 | $data ~~ dpath '//AAA/*' # anywhere + anystep | ||||
| 102 | $data ~~ dpath '//AAA/*[size == 3]' # filter by arrays/hash size | ||||
| 103 | $data ~~ dpath '//AAA/*[size != 3]' # filter by arrays/hash size | ||||
| 104 | $data ~~ dpath '/"EE/E"/CCC' # quote strange keys | ||||
| 105 | $data ~~ dpath '/AAA/BBB/CCC/*[1]' # filter by array index | ||||
| 106 | $data ~~ dpath '/AAA/BBB/CCC/*[ idx == 1 ]' # same, filter by array index | ||||
| 107 | $data ~~ dpath '//AAA/BBB/*[key eq "CCC"]' # filter by exact keys | ||||
| 108 | $data ~~ dpath '//AAA/*[ key =~ /CC/ ]' # filter by regex matching keys | ||||
| 109 | $data ~~ dpath '//CCC/*[ value eq "RR2" ]' # filter by values of hashes | ||||
| 110 | |||||
| 111 | See full details in C<t/data_dpath.t>. | ||||
| 112 | |||||
| 113 | You can get references into the C<$data> data structure by using C<dpathr>: | ||||
| 114 | |||||
| 115 | $data ~~ dpathr '//AAA/BBB/*' | ||||
| 116 | # etc. | ||||
| 117 | |||||
| 118 | You can request iterators to do incremental searches using C<dpathi>: | ||||
| 119 | |||||
| 120 | my $benchmarks_iter = dpathi($data)->isearch("//Benchmark"); | ||||
| 121 | while ($benchmarks_iter->isnt_exhausted) | ||||
| 122 | { | ||||
| 123 | my $benchmark = $benchmarks_iter->value; | ||||
| 124 | my $ancestors_iter = $benchmark->isearch ("/::ancestor"); | ||||
| 125 | while ($ancestors_iter->isnt_exhausted) | ||||
| 126 | { | ||||
| 127 | my $ancestor = $ancestors_iter->value; | ||||
| 128 | print Dumper( $ancestor->deref ); | ||||
| 129 | } | ||||
| 130 | } | ||||
| 131 | |||||
| 132 | This finds all elements anywhere behind a key "Benchmark" and for each | ||||
| 133 | one found print all its ancestors, respectively. See also chapter | ||||
| 134 | L<Iterator style|/"Iterator style">. | ||||
| 135 | |||||
| 136 | =head1 ABOUT | ||||
| 137 | |||||
| 138 | With this module you can address points in a datastructure by | ||||
| 139 | describing a "path" to it using hash keys, array indexes or some | ||||
| 140 | wildcard-like steps. It is inspired by XPath but differs from it. | ||||
| 141 | |||||
| 142 | =head2 Why not XPath? | ||||
| 143 | |||||
| 144 | XPath is for XML. DPath is for data structures, with a stronger Perl | ||||
| 145 | focus. | ||||
| 146 | |||||
| 147 | Although XML documents are data structures, they are special. | ||||
| 148 | |||||
| 149 | Elements in XML always have an order which is in contrast to hash keys | ||||
| 150 | in Perl. | ||||
| 151 | |||||
| 152 | XML elements names on same level can be repeated, not so in hashes. | ||||
| 153 | |||||
| 154 | XML element names are more limited than arbitrary strange hash keys. | ||||
| 155 | |||||
| 156 | XML elements can have attributes and those can be addressed by XPath; | ||||
| 157 | Perl data structures do not need this. On the other side, data | ||||
| 158 | structures in Perl can contain blessed elements, DPath can address | ||||
| 159 | this. | ||||
| 160 | |||||
| 161 | XML has namespaces, data structures have not. | ||||
| 162 | |||||
| 163 | Arrays starting with index 1 as in XPath would be confusing to read | ||||
| 164 | for data structures. | ||||
| 165 | |||||
| 166 | DPath allows filter expressions that are in fact just Perl expressions | ||||
| 167 | not an own sub language as in XPath. | ||||
| 168 | |||||
| 169 | =head2 Comparison with Data::Path | ||||
| 170 | |||||
| 171 | There is a similar approach on CPAN, L<Data::Path|Data::Path>. Here is | ||||
| 172 | a comparison matrix between L<Data::Path|Data::Path> and | ||||
| 173 | L<Data::DPath|Data::DPath>. | ||||
| 174 | |||||
| 175 | (Warning: B<alpha> grade comparison ahead, not yet fully verified, | ||||
| 176 | only evaluated by reading the source. Speed comparison not really | ||||
| 177 | benchmarked.) | ||||
| 178 | |||||
| 179 | --------------------------------------------------------------------- | ||||
| 180 | Criteria Data::Path Data::DPath | ||||
| 181 | --------------------------------------------------------------------- | ||||
| 182 | |||||
| 183 | real XPath syntax no no | ||||
| 184 | |||||
| 185 | --------------------------------------------------------------------- | ||||
| 186 | |||||
| 187 | allow strange, YES YES | ||||
| 188 | non-xml but | ||||
| 189 | perl-like although | ||||
| 190 | hash keys limited, | ||||
| 191 | see next | ||||
| 192 | --------------------------------------------------------------------- | ||||
| 193 | |||||
| 194 | allows special no YES | ||||
| 195 | chars of own | ||||
| 196 | path syntax in you can quoting everything | ||||
| 197 | hash keys | ||||
| 198 | ("/[]|*.") | ||||
| 199 | |||||
| 200 | --------------------------------------------------------------------- | ||||
| 201 | |||||
| 202 | call subs in YES no | ||||
| 203 | data structure, | ||||
| 204 | like: | ||||
| 205 | /method() | ||||
| 206 | --------------------------------------------------------------------- | ||||
| 207 | |||||
| 208 | callbacks on YES no | ||||
| 209 | not found keys | ||||
| 210 | |||||
| 211 | --------------------------------------------------------------------- | ||||
| 212 | |||||
| 213 | element "//" no YES | ||||
| 214 | for "ANYWHERE" | ||||
| 215 | (//foo/bar) | ||||
| 216 | |||||
| 217 | --------------------------------------------------------------------- | ||||
| 218 | |||||
| 219 | element "." no YES | ||||
| 220 | for "NOSTEP" or | ||||
| 221 | "actual position" | ||||
| 222 | (/.[filter expr]) | ||||
| 223 | |||||
| 224 | --------------------------------------------------------------------- | ||||
| 225 | |||||
| 226 | element ".." no YES | ||||
| 227 | for "PARENT" | ||||
| 228 | (//foo/..) | ||||
| 229 | |||||
| 230 | --------------------------------------------------------------------- | ||||
| 231 | |||||
| 232 | element "::ancestor" no YES | ||||
| 233 | for "ANCESTOR" | ||||
| 234 | (//foo/::ancestor) | ||||
| 235 | |||||
| 236 | --------------------------------------------------------------------- | ||||
| 237 | |||||
| 238 | element no YES | ||||
| 239 | "::ancestor-or-self" | ||||
| 240 | |||||
| 241 | --------------------------------------------------------------------- | ||||
| 242 | |||||
| 243 | element "*" no YES | ||||
| 244 | for "ANYSTEP" or | ||||
| 245 | "all subelements" | ||||
| 246 | (/foo/*) | ||||
| 247 | |||||
| 248 | --------------------------------------------------------------------- | ||||
| 249 | |||||
| 250 | array access YES YES | ||||
| 251 | like /foo[4] | ||||
| 252 | although including negative indexes | ||||
| 253 | limited and whitespace awareness | ||||
| 254 | |||||
| 255 | --------------------------------------------------------------------- | ||||
| 256 | |||||
| 257 | complex no YES | ||||
| 258 | filter expressions | ||||
| 259 | like full Perl expressions | ||||
| 260 | /foo[size == 3] or plus sugar functions | ||||
| 261 | /.[isa("Foo::Bar")] | ||||
| 262 | |||||
| 263 | --------------------------------------------------------------------- | ||||
| 264 | |||||
| 265 | works with YES YES | ||||
| 266 | blessed subelements | ||||
| 267 | |||||
| 268 | --------------------------------------------------------------------- | ||||
| 269 | |||||
| 270 | arrays start YES YES | ||||
| 271 | with index 0 | ||||
| 272 | (in contrast | ||||
| 273 | to 1 as in XPath) | ||||
| 274 | |||||
| 275 | --------------------------------------------------------------------- | ||||
| 276 | |||||
| 277 | array semantics /foo[2] /foo/*[2] | ||||
| 278 | is a bit different | ||||
| 279 | |||||
| 280 | --------------------------------------------------------------------- | ||||
| 281 | |||||
| 282 | handling of croak RETURN EMPTY | ||||
| 283 | not matching | ||||
| 284 | paths but can be | ||||
| 285 | overwritten | ||||
| 286 | as callback | ||||
| 287 | |||||
| 288 | --------------------------------------------------------------------- | ||||
| 289 | |||||
| 290 | usage sugar none overloaded '~~' operator | ||||
| 291 | |||||
| 292 | --------------------------------------------------------------------- | ||||
| 293 | |||||
| 294 | Speed FAST quite fast | ||||
| 295 | |||||
| 296 | - raw Perl - probably comparable | ||||
| 297 | - considered fast speed with expressions | ||||
| 298 | that Data::Path handles | ||||
| 299 | - slower on fuzzy paths, | ||||
| 300 | eg. with many "//" in it | ||||
| 301 | |||||
| 302 | --------------------------------------------------------------------- | ||||
| 303 | |||||
| 304 | Perl Versions 5.6+ 5.8+ | ||||
| 305 | |||||
| 306 | --------------------------------------------------------------------- | ||||
| 307 | |||||
| 308 | Install chance 100% 90% | ||||
| 309 | (http://deps | ||||
| 310 | .cpantesters | ||||
| 311 | .org) | ||||
| 312 | |||||
| 313 | --------------------------------------------------------------------- | ||||
| 314 | |||||
| 315 | =head3 Summary | ||||
| 316 | |||||
| 317 | Generally L<Data::Path|Data::Path> is for simpler use cases but does | ||||
| 318 | not suffer from surrounding meta problems: it has no dependencies, is | ||||
| 319 | fast and works on practically every Perl version. | ||||
| 320 | |||||
| 321 | Whereas L<Data::DPath|Data::DPath> provides more XPath-alike features, | ||||
| 322 | but isn't quite as fast and has more dependencies. | ||||
| 323 | |||||
| 324 | =head1 Security warning | ||||
| 325 | |||||
| 326 | B<Watch out!> This module C<eval>s parts of provided dpaths (in | ||||
| 327 | particular: the filter expressions). Don't use it if you don't trust | ||||
| 328 | your paths. | ||||
| 329 | |||||
| 330 | Since v0.41 the filter expressions are secured using L<Safe.pm|Safe> | ||||
| 331 | to only allow basic Perl core ops. This provides more safety but is | ||||
| 332 | also significantly slower. To unrestrict this to pre-v0.41 raw C<eval> | ||||
| 333 | behaviour you can set C<$Data::DPath::USE_SAFE> to False: | ||||
| 334 | |||||
| 335 | local $Data::DPath::USE_SAFE; | ||||
| 336 | # dpath '//CCC//*[ unsecure_perl_expression ]' | ||||
| 337 | |||||
| 338 | Read L<Safe.pm|Safe> to understand how secure this is. | ||||
| 339 | |||||
| 340 | =head1 FUNCTIONS | ||||
| 341 | |||||
| 342 | =head2 dpath( $path_str ) | ||||
| 343 | |||||
| 344 | Meant as the front end function for everyday use of Data::DPath. It | ||||
| 345 | takes a path string and returns a C<Data::DPath::Path> object on which | ||||
| 346 | the match method can be called with data structures and the operator | ||||
| 347 | C<~~> is overloaded. | ||||
| 348 | |||||
| 349 | The function is prototyped to take exactly one argument so that you | ||||
| 350 | can omit the parens in many cases. | ||||
| 351 | |||||
| 352 | See SYNOPSIS. | ||||
| 353 | |||||
| 354 | =head2 dpathr( $path_str ) | ||||
| 355 | |||||
| 356 | Same as C<dpath> but toggles that results are references to the | ||||
| 357 | matched points in the data structure. | ||||
| 358 | |||||
| 359 | =head2 dpathi( $data ) | ||||
| 360 | |||||
| 361 | This is a different, iterator style, approach. | ||||
| 362 | |||||
| 363 | You provide the data structure on which to work and get back a current | ||||
| 364 | context containing the root element (as if you had searched for the | ||||
| 365 | path C</>), and now you can do incremental searches using C<isearch>. | ||||
| 366 | |||||
| 367 | See chapter L<Iterator style|/"Iterator style"> below for details. | ||||
| 368 | |||||
| 369 | =head1 API METHODS | ||||
| 370 | |||||
| 371 | =head2 match( $data, $path ) | ||||
| 372 | |||||
| 373 | Returns an array of all values in C<$data> that match the C<$path>. | ||||
| 374 | |||||
| 375 | =head1 OPERATOR | ||||
| 376 | |||||
| 377 | =head2 ~~ | ||||
| 378 | |||||
| 379 | Does a C<match> of a dpath against a data structure. | ||||
| 380 | |||||
| 381 | Due to the B<matching> nature of DPath the operator C<~~> should make | ||||
| 382 | your code more readable. | ||||
| 383 | |||||
| 384 | =head1 THE DPATH LANGUAGE | ||||
| 385 | |||||
| 386 | =head2 Synopsis | ||||
| 387 | |||||
| 388 | /AAA/BBB/CCC | ||||
| 389 | /AAA/*/CCC | ||||
| 390 | //CCC/* | ||||
| 391 | //CCC/*[2] | ||||
| 392 | //CCC/*[size == 3] | ||||
| 393 | //CCC/*[size != 3] | ||||
| 394 | /"EE/E"/CCC | ||||
| 395 | /AAA/BBB/CCC/*[1] | ||||
| 396 | /AAA/BBB/CCC/*[ idx == 1 ] | ||||
| 397 | //AAA/BBB/*[key eq "CCC"] | ||||
| 398 | //AAA/*[ key =~ /CC/ ] | ||||
| 399 | //CCC/*[value eq "RR2"] | ||||
| 400 | //.[ size == 4 ] | ||||
| 401 | /.[ isa("Funky::Stuff") ]/.[ size == 5 ]/.[ reftype eq "ARRAY" ] | ||||
| 402 | |||||
| 403 | =head2 Modeled on XPath | ||||
| 404 | |||||
| 405 | The basic idea is that of XPath: define a way through a datastructure | ||||
| 406 | and allow some funky ways to describe fuzzy ways. The syntax is | ||||
| 407 | roughly looking like XPath but in fact have not much more in common. | ||||
| 408 | |||||
| 409 | =head3 Some wording | ||||
| 410 | |||||
| 411 | I call the whole path a, well, B<path>. | ||||
| 412 | |||||
| 413 | It consists of single (B<path>) B<steps> that are divided by the path | ||||
| 414 | separator C</>. | ||||
| 415 | |||||
| 416 | Each step can have a B<filter> appended in brackets C<[]> that narrows | ||||
| 417 | down the matching set of results. | ||||
| 418 | |||||
| 419 | Additional functions provided inside the filters are called, well, | ||||
| 420 | B<filter functions>. | ||||
| 421 | |||||
| 422 | Each step has a set of B<point>s relative to the set of points before | ||||
| 423 | this step, all starting at the root of the data structure. | ||||
| 424 | |||||
| 425 | =head2 Special elements | ||||
| 426 | |||||
| 427 | =over 4 | ||||
| 428 | |||||
| 429 | =item C<//> | ||||
| 430 | |||||
| 431 | Anchors to any hash or array inside the data structure below the | ||||
| 432 | currently found points (or the root). | ||||
| 433 | |||||
| 434 | Typically used at the start of a path to anchor the path anywhere | ||||
| 435 | instead of only the root node: | ||||
| 436 | |||||
| 437 | //FOO/BAR | ||||
| 438 | |||||
| 439 | but can also happen inside paths to skip middle parts: | ||||
| 440 | |||||
| 441 | /AAA/BBB//FARAWAY | ||||
| 442 | |||||
| 443 | This allows any way between C<BBB> and C<FARAWAY>. | ||||
| 444 | |||||
| 445 | =item C<*> | ||||
| 446 | |||||
| 447 | Matches one step of any value relative to the current points (or the | ||||
| 448 | root). This step might be any hash key or all values of an array in | ||||
| 449 | the step before. | ||||
| 450 | |||||
| 451 | =item C<..> | ||||
| 452 | |||||
| 453 | Matches the parent element relative to the current points. | ||||
| 454 | |||||
| 455 | =item C<::ancestor> | ||||
| 456 | |||||
| 457 | Matches all ancestors (parent, grandparent, etc.) of the current node. | ||||
| 458 | |||||
| 459 | =item C<::ancestor-or-self> | ||||
| 460 | |||||
| 461 | Matches all ancestors (parent, grandparent, etc.) of the current node | ||||
| 462 | and the current node itself. | ||||
| 463 | |||||
| 464 | =item C<.> | ||||
| 465 | |||||
| 466 | A "no step". This keeps passively at the current points, but allows | ||||
| 467 | incrementally attaching filters to points or to otherwise hard to | ||||
| 468 | reach steps, like the top root element C</>. So you can do: | ||||
| 469 | |||||
| 470 | /.[ FILTER ] | ||||
| 471 | |||||
| 472 | or chain filters: | ||||
| 473 | |||||
| 474 | /AAA/BBB/.[ filter1 ]/.[ filter2 ]/.[ filter3 ] | ||||
| 475 | |||||
| 476 | This way you do not need to stuff many filters together into one huge | ||||
| 477 | killer expression and can more easily maintain them. | ||||
| 478 | |||||
| 479 | See L<Filters|Filters> for more details on filters. | ||||
| 480 | |||||
| 481 | =item If you need those special elements to be not special but as | ||||
| 482 | key names, just quote them: | ||||
| 483 | |||||
| 484 | /"*"/ | ||||
| 485 | /"*"[ filter ]/ | ||||
| 486 | /"::ancestor"/ | ||||
| 487 | /".."/ | ||||
| 488 | /".."[ filter ]/ | ||||
| 489 | /"."/ | ||||
| 490 | /"."[ filter ]/ | ||||
| 491 | /"//"/ | ||||
| 492 | /"//"[ filter ]/ | ||||
| 493 | |||||
| 494 | =back | ||||
| 495 | |||||
| 496 | =head2 Difference between C</step[filter]> vs. C</step/.[filter]> | ||||
| 497 | vs. C</step/*[filter]> | ||||
| 498 | |||||
| 499 | The filter applies to the matched points of the step to which it is | ||||
| 500 | applied, therefore C</part[filter]> is the normal form, but see below | ||||
| 501 | how this affects array access. | ||||
| 502 | |||||
| 503 | The "no step" "/." stays on the current step, therefore | ||||
| 504 | C</part/.[filter]> should be the same as C</part[filter]>. | ||||
| 505 | |||||
| 506 | Lastly, C</part/*[filter]> means: take all the sub elements ("*") | ||||
| 507 | B<below> "step" and apply the filter to those. The most common use is | ||||
| 508 | to take "all" elements of an array and chose one element via index: | ||||
| 509 | C</step/*[4]/>. This takes the fifth element of the array inside | ||||
| 510 | "step". This is explained in even more depth in the next section. | ||||
| 511 | |||||
| 512 | =head2 Difference between C</affe[2]> vs. C</affe/*[2]> | ||||
| 513 | |||||
| 514 | B<Read carefully.> This is different from what you probably expect | ||||
| 515 | when you know XPath. | ||||
| 516 | |||||
| 517 | In B<XPath> "/affe[2]" would address an item of all elements named | ||||
| 518 | "affe" on this step. This is because in XPath elements with the same | ||||
| 519 | name can be repeated, like this: | ||||
| 520 | |||||
| 521 | <coolanimals> | ||||
| 522 | <affe>Pavian</affe> | ||||
| 523 | <affe>Gorilla</affe> | ||||
| 524 | <affe>Schimpanse</affe> | ||||
| 525 | </coolanimals> | ||||
| 526 | |||||
| 527 | and "//affe[2]" would get "Schimpanse" (we ignore the fact that in | ||||
| 528 | XPath array indexes start with 1, not 0 as in DPath, so we would | ||||
| 529 | actually get "Gorilla"; anyway, both are funky fellows). | ||||
| 530 | |||||
| 531 | So what does "/affe[2]" return in DPath? Nothing! It makes no sense, | ||||
| 532 | because "affe" is interpreted as a hash key and hash keys can not | ||||
| 533 | repeat in Perl data structures. | ||||
| 534 | |||||
| 535 | So what you often want in DPath is to look at the elements B<below> | ||||
| 536 | "affe" and takes the third of them, e.g. in such a structure: | ||||
| 537 | |||||
| 538 | { affe => [ | ||||
| 539 | 'Pavian', | ||||
| 540 | 'Gorilla', | ||||
| 541 | 'Schimpanse' | ||||
| 542 | ] | ||||
| 543 | } | ||||
| 544 | |||||
| 545 | the path "/affe/*[2]" would return "Schimpanse". | ||||
| 546 | |||||
| 547 | =head2 Filters | ||||
| 548 | |||||
| 549 | Filters are conditions in brackets. They apply to all elements that | ||||
| 550 | are directly found by the path part to which the filter is appended. | ||||
| 551 | |||||
| 552 | Internally the filter condition is part of a C<grep> construct | ||||
| 553 | (exception: single integers, they choose array elements). See below. | ||||
| 554 | |||||
| 555 | Examples: | ||||
| 556 | |||||
| 557 | =over 4 | ||||
| 558 | |||||
| 559 | =item C</FOO/*[2]/> | ||||
| 560 | |||||
| 561 | A single integer as filter means choose an element from an array. So | ||||
| 562 | the C<*> finds all subelements that follow current step C<FOO> and the | ||||
| 563 | C<[2]> reduces them to only the third element (index starts at 0). | ||||
| 564 | |||||
| 565 | =item C</FOO/*[ idx == 2 ]/> | ||||
| 566 | |||||
| 567 | The C<*> is a step that matches all elements after C<FOO>, but with | ||||
| 568 | the filter only those elements are chosen that are of index 2. This is | ||||
| 569 | actually the same as just C</FOO/*[2]>. | ||||
| 570 | |||||
| 571 | =item C</FOO/*[key eq "CCC"]> | ||||
| 572 | |||||
| 573 | In all elements after C<FOO> it matches only those elements whose key | ||||
| 574 | is "CCC". | ||||
| 575 | |||||
| 576 | =item C</FOO/*[key =~ /CCC/ ]> | ||||
| 577 | |||||
| 578 | In all elements after step C<FOO> it matches only those elements whose | ||||
| 579 | key matches the regex C</CCC/>. It is actually just Perl code inside | ||||
| 580 | the filter which works in a grep{}-like context. | ||||
| 581 | |||||
| 582 | =item C<//FOO/*[value eq "RR2"]> | ||||
| 583 | |||||
| 584 | Find elements below C<FOO> that have the value C<RR2>. | ||||
| 585 | |||||
| 586 | Combine this with the parent step C<..>: | ||||
| 587 | |||||
| 588 | =item C<//FOO/*[value eq "RR2"]/..> | ||||
| 589 | |||||
| 590 | Find such an element below C<FOO> where an element with value C<RR2> | ||||
| 591 | is contained. | ||||
| 592 | |||||
| 593 | =item C<//FOO[size E<gt>= 3]> | ||||
| 594 | |||||
| 595 | Find C<FOO> elements that are arrays or hashes of size 3 or bigger. | ||||
| 596 | |||||
| 597 | =back | ||||
| 598 | |||||
| 599 | =head2 Filter functions | ||||
| 600 | |||||
| 601 | The filter condition is internally part of a C<grep> over the current | ||||
| 602 | subset of values. So you can write any condition like in a grep and | ||||
| 603 | also use the variable C<$_>. | ||||
| 604 | |||||
| 605 | Additional filter functions are available that are usually written to | ||||
| 606 | use $_ by default. See L<Data::DPath::Filters|Data::DPath::Filters> | ||||
| 607 | for complete list of available filter functions. | ||||
| 608 | |||||
| 609 | Here are some of them: | ||||
| 610 | |||||
| 611 | =over 4 | ||||
| 612 | |||||
| 613 | =item idx | ||||
| 614 | |||||
| 615 | Returns the current index inside array elements. | ||||
| 616 | |||||
| 617 | Please note that the current matching elements might not be in a | ||||
| 618 | defined order if resulting from anything else than arrays. | ||||
| 619 | |||||
| 620 | =item size | ||||
| 621 | |||||
| 622 | Returns the size of the current element. If it is an arrayref it | ||||
| 623 | returns number of elements, if it's a hashref it returns number of | ||||
| 624 | keys, if it's a scalar it returns 1, everything else returns -1. | ||||
| 625 | |||||
| 626 | =item key | ||||
| 627 | |||||
| 628 | Returns the key of the current element if it is a hashref. Else it | ||||
| 629 | returns undef. | ||||
| 630 | |||||
| 631 | =item value | ||||
| 632 | |||||
| 633 | Returns the value of the current element. If it is a hashref, return | ||||
| 634 | the value. If a scalar, return the scalar. Else return undef. | ||||
| 635 | |||||
| 636 | =back | ||||
| 637 | |||||
| 638 | =head2 Special characters | ||||
| 639 | |||||
| 640 | There are 4 special characters: the slash C</>, paired brackets C<[]>, | ||||
| 641 | the double-quote C<"> and the backslash C<\>. They are needed and | ||||
| 642 | explained in a logical order. | ||||
| 643 | |||||
| 644 | Path parts are divided by the slash </>. | ||||
| 645 | |||||
| 646 | A path part can be extended by a filter with appending an expression | ||||
| 647 | in brackets C<[]>. | ||||
| 648 | |||||
| 649 | To contain slashes in hash keys, they can be surrounded by double | ||||
| 650 | quotes C<">. | ||||
| 651 | |||||
| 652 | To contain double-quotes in hash keys they can be escaped with | ||||
| 653 | backslash C<\>. | ||||
| 654 | |||||
| 655 | Backslashes in path parts don't need to be escaped, except before | ||||
| 656 | escaped quotes (but see below on L<Backslash handling|Backslash | ||||
| 657 | handling>). | ||||
| 658 | |||||
| 659 | Filters of parts are already sufficiently divided by the brackets | ||||
| 660 | C<[]>. There is no need to handle special characters in them, not even | ||||
| 661 | double-quotes. The filter expression just needs to be balanced on the | ||||
| 662 | brackets. | ||||
| 663 | |||||
| 664 | So this is the order how to create paths: | ||||
| 665 | |||||
| 666 | =over 4 | ||||
| 667 | |||||
| 668 | =item 1. backslash double-quotes that are part of the key | ||||
| 669 | |||||
| 670 | =item 2. put double-quotes around the resulting key | ||||
| 671 | |||||
| 672 | =item 3. append the filter expression after the key | ||||
| 673 | |||||
| 674 | =item 4. separate several path parts with slashes | ||||
| 675 | |||||
| 676 | =back | ||||
| 677 | |||||
| 678 | =head2 Backslash handling | ||||
| 679 | |||||
| 680 | If you know backslash in Perl strings, skip this paragraph, it should | ||||
| 681 | be the same. | ||||
| 682 | |||||
| 683 | It is somewhat difficult to create a backslash directly before a | ||||
| 684 | quoted double-quote. | ||||
| 685 | |||||
| 686 | Inside the DPath language the typical backslash rules of apply that | ||||
| 687 | you already know from Perl B<single quoted> strings. The challenge is | ||||
| 688 | to specify such strings inside Perl programs where another layer of | ||||
| 689 | this backslashing applies. | ||||
| 690 | |||||
| 691 | Without quotes it's all easy. Both a single backslash C<\> and a | ||||
| 692 | double backslash C<\\> get evaluated to a single backslash C<\>. | ||||
| 693 | |||||
| 694 | Extreme edge case by example: To specify a plain hash key like this: | ||||
| 695 | |||||
| 696 | "EE\E5\" | ||||
| 697 | |||||
| 698 | where the quotes are part of the key, you need to escape the quotes | ||||
| 699 | and the backslash: | ||||
| 700 | |||||
| 701 | \"EE\E5\\\" | ||||
| 702 | |||||
| 703 | Now put quotes around that to use it as DPath hash key: | ||||
| 704 | |||||
| 705 | "\"EE\E5\\\"" | ||||
| 706 | |||||
| 707 | and if you specify this in a Perl program you need to additionally | ||||
| 708 | escape the backslashes (i.e., double their count): | ||||
| 709 | |||||
| 710 | "\"EE\E5\\\\\\"" | ||||
| 711 | |||||
| 712 | As you can see, strangely, this backslash escaping is only needed on | ||||
| 713 | backslashes that are not standing alone. The first backslash before | ||||
| 714 | the first escaped double-quote is ok to be a single backslash. | ||||
| 715 | |||||
| 716 | All strange, isn't it? At least it's (hopefully) consistent with | ||||
| 717 | something you know (Perl, Shell, etc.). | ||||
| 718 | |||||
| 719 | =head1 Iterator style | ||||
| 720 | |||||
| 721 | The I<iterator style> approach is an alternative to the already | ||||
| 722 | describe I<get-all-results-at-once> approach. With it you iterate over | ||||
| 723 | the results one by one and even allow relative sub searches on | ||||
| 724 | each. The iterators use the L<Iterator|Iterator> API. | ||||
| 725 | |||||
| 726 | Please note, that the iterators do B<not> save memory, they are just | ||||
| 727 | holding the context to go step-by-step and to start subsequent | ||||
| 728 | searches. Each iterator needs to evaluate its whole result set | ||||
| 729 | first. So in fact with nested iterators your memory might even go up. | ||||
| 730 | |||||
| 731 | =head2 Basic usage by example | ||||
| 732 | |||||
| 733 | Initialize a DPath iterator on a data structure using: | ||||
| 734 | |||||
| 735 | my $root = dpathi($data); | ||||
| 736 | |||||
| 737 | Create a new iterator context, with the path relative to current | ||||
| 738 | root context: | ||||
| 739 | |||||
| 740 | my $affe_iter = $root->isearch("//anywhere/affe"); | ||||
| 741 | |||||
| 742 | Iterate over affe results: | ||||
| 743 | |||||
| 744 | while ($affe_iter->isnt_exhausted) | ||||
| 745 | { | ||||
| 746 | my $affe_point = $affe_iter->value; # next "affe" point | ||||
| 747 | my $affe = $affe_point->deref; # the actual "affe" | ||||
| 748 | } | ||||
| 749 | |||||
| 750 | =head2 Nested iterators example | ||||
| 751 | |||||
| 752 | This example is taken from the | ||||
| 753 | L<Benchmark::Perl::Formance|Benchmark::Perl::Formance> suite, where | ||||
| 754 | the several plugins are allowed to provide their results anywhere | ||||
| 755 | at any level down in the result hash. | ||||
| 756 | |||||
| 757 | When the results are printed we look for all keys C<Benchmark> and | ||||
| 758 | regenerate the path to each so we can name it accordingly, e.g., | ||||
| 759 | C<plugin.name.subname>. | ||||
| 760 | |||||
| 761 | For this we need an iterator to get the single C<Benchmark> points one | ||||
| 762 | by one and evaluate the corresponding ancestors to fetch their hash | ||||
| 763 | keys. Here is the code: | ||||
| 764 | |||||
| 765 | my $benchmarks_iter = dpathi($results)->isearch("//Benchmark"); | ||||
| 766 | while ($benchmarks_iter->isnt_exhausted) | ||||
| 767 | { | ||||
| 768 | my $benchmark = $benchmarks_iter->value; | ||||
| 769 | my $ancestors_iter = $benchmark->isearch ("/::ancestor"); | ||||
| 770 | while ($ancestors_iter->isnt_exhausted) | ||||
| 771 | { | ||||
| 772 | my $ancestor = $ancestors_iter->value; | ||||
| 773 | print Dumper( $ancestor->deref ); #(1) | ||||
| 774 | print $ancestor->first_point->{attrs}{key}; #(2) | ||||
| 775 | } | ||||
| 776 | } | ||||
| 777 | |||||
| 778 | Note that we have two iterators, the first one (C<$benchmarks_iter>) | ||||
| 779 | over the actual benchmark results and the second one | ||||
| 780 | (C<$ancestors_iter>) over the ancestors relative to one benchmark. | ||||
| 781 | |||||
| 782 | In line B<#(1)> you can see that once you have the searched point, | ||||
| 783 | here the ancestors, you get the actual data using | ||||
| 784 | C<< $iterator->value->deref >>. | ||||
| 785 | |||||
| 786 | The line B<#(2)> is utilizing the internal data structure to find out | ||||
| 787 | about the actual hash key under which the point is located. (There is | ||||
| 788 | also an official API to that: C<< $ancestor->first_point->attrs->key >>, | ||||
| 789 | but there it's neccessary to check for undefined values before | ||||
| 790 | calling the methods F<attrs> and F<key>, so I went the easy way). | ||||
| 791 | |||||
| 792 | =head1 INTERNAL METHODS | ||||
| 793 | |||||
| 794 | To make pod coverage happy. | ||||
| 795 | |||||
| 796 | =head2 build_dpath | ||||
| 797 | |||||
| 798 | Prepares internal attributes for I<dpath>. | ||||
| 799 | |||||
| 800 | =head2 build_dpathr | ||||
| 801 | |||||
| 802 | Prepares internal attributes for I<dpathr>. | ||||
| 803 | |||||
| 804 | =head2 build_dpathi | ||||
| 805 | |||||
| 806 | Prepares internal attributes for I<dpathi>. | ||||
| 807 | |||||
| 808 | =head1 AUTHOR | ||||
| 809 | |||||
| 810 | Steffen Schwigon, C<< <schwigon at cpan.org> >> | ||||
| 811 | |||||
| 812 | =head1 CONTRIBUTIONS | ||||
| 813 | |||||
| 814 | Florian Ragwitz (cleaner exports, $_ scoping, general perl consultant) | ||||
| 815 | |||||
| 816 | =head1 SEE ALSO | ||||
| 817 | |||||
| 818 | There are other modules on CPAN which are related to finding elements | ||||
| 819 | in data structures. | ||||
| 820 | |||||
| 821 | =over 4 | ||||
| 822 | |||||
| 823 | =item Data::Path | ||||
| 824 | |||||
| 825 | L<http://metacpan.org/release/Data-Path> | ||||
| 826 | |||||
| 827 | =item XML::XPathEngine | ||||
| 828 | |||||
| 829 | L<http://metacpan.org/release/XML-XPathEngine> | ||||
| 830 | |||||
| 831 | =item Tree::XPathEngine | ||||
| 832 | |||||
| 833 | L<http://metacpan.org/release/Tree-XPathEngine> | ||||
| 834 | |||||
| 835 | =item Class::XPath | ||||
| 836 | |||||
| 837 | L<http://metacpan.org/release/Class-XPath> | ||||
| 838 | |||||
| 839 | =item Hash::Path | ||||
| 840 | |||||
| 841 | L<http://metacpan.org/release/Hash-Path> | ||||
| 842 | |||||
| 843 | =back | ||||
| 844 | |||||
| 845 | =head1 AUTHOR | ||||
| 846 | |||||
| 847 | Steffen Schwigon <ss5@renormalist.net> | ||||
| 848 | |||||
| 849 | =head1 COPYRIGHT AND LICENSE | ||||
| 850 | |||||
| 851 | This software is copyright (c) 2012 by Steffen Schwigon. | ||||
| 852 | |||||
| 853 | This is free software; you can redistribute it and/or modify it under | ||||
| 854 | the same terms as the Perl 5 programming language system itself. | ||||
| 855 | |||||
| 856 | =cut | ||||
| 857 | |||||
| 858 | |||||
| 859 | __END__ |