##-*- Mode: CPerl -*-

## File: DDC::Query::Parser.pm
## Author: Bryan Jurish <moocow@cpan.org>
## Description:
##  + extendable full-text index using mysql: abstract queries: parser (high-level)
##======================================================================

package DDC::Query::Parser;
use DDC::Utils qw(:escape);
use DDC::Query;
use DDC::Query::Filter;
use DDC::Query::yylexer;
use DDC::Query::yyparser;

use strict;

##======================================================================
## Globals etc.
our @ISA = qw();


##======================================================================
## $qp = $CLASS_OR_OBJ->new(%args)
## + abstract constructor
## + you should probably call free() before destroying the object to be safe
## + object structure, %args:
##   {
##    ##-- Status flags
##    error => $current_errstr, ##-- false indicates no error
##
##    ##-- parsed data
##    #query  => $query,          ##-- most recently parsed query, a DDC::Query object
##    #filters => \@filters,      ##-- query filters
##
##    ##-- Underlying lexer/parser pair
##    lexer  => $yylexer,   ##-- a DDC::YYLexer object
##    parser => $yyparser,  ##-- a DDC::YYParser object
##    yydebug => $mask,     ##-- yydebug value
##
##    ##-- Closures
##    yylex    => \&yylex,   ##-- yapp-friendly lexer sub
##    yyerror  => \&yyerror, ##-- yapp-friendly parser sub
##   }
sub new {
  my $that = shift;
  my $qp = bless({
		  ##-- Status flags
		  error => undef,

		  ##-- Underlying lexer/parser pair
		  lexer  => DDC::Query::yylexer->new(),
		  parser => DDC::Query::yyparser->new(),

		  ##-- runtime data
		  query   => undef,
		  filters => undef,

		  ##-- parser debugging
		  yydebug  => 0, # no debug
		  #yydebug => 0x01,  # lexer debug
		  #yydebug => 0x02,  # state info
		  #yydebug => 0x04,  # driver actions (shift/reduce/etc.)
		  #yydebug => 0x08,  # stack dump
		  #yydebug => 0x10,  # Error recovery trace
		  #yydebug => 0x01 | 0x02 | 0x04 | 0x08, # almost everything
		  #yydebug => 0xffffffff, ##-- pretty much everything

		  ##-- User args
		  @_
		 },
		 ref($that)||$that);
  $qp->getClosures();
  return $qp;
}

## undef = $qp->free()
##  + clears $qp itself, as well as $qp->{parser}{USER}
##  + makes $qp subsequently useless, but destroyable
sub free {
  my $qp = shift;
  delete($qp->{parser}{USER}) if ($qp->{parser});
  %$qp = qw();
}

## $qp = $qp->getClosures()
##  + compiles lexer & parser closures
sub getClosures {
  my $qp = shift;
  delete(@$qp{qw(yylex yyerror)});
  $qp->{yylex}   = $qp->_yylex_sub();
  $qp->{yyerror} = $qp->_yyerror_sub();
  return $qp;
}

##======================================================================
## API: Input selection

## undef = $qp->reset()
##  + reset all parse-relevant data structures
sub reset {
  my $qp = shift;

  ##-- status flags
  delete($qp->{error});

  ##-- runtime data
  delete(@$qp{qw(query filters)});

  ##-- lexer & parser state
  $qp->{lexer}->reset();
  delete($qp->{parser}{USER}{hint});
  $qp->{parser}{USER}{qp}      = $qp;
  $qp->{parser}{USER}{lex}     = $qp->{lexer};
}

## $qp = $qp->from($which,$src, %opts)
##  + wraps $qp->{lexer}->from()
##  + $which is one of qw(fh file string)
##  + $src is the actual source (default: 'string')
##  + %opts may contain (src=>$name)
sub from {
  return $_[0]{lexer}->from(@_[1..$#_]) ? $_[0] : undef;
}

## $qp = $qp->fromFile($filename_or_handle,%opts)
##  + wraps $qp->{lexer}->fromFile()
sub fromFile {
  return $_[0]{lexer}->fromFile(@_[1..$#_]) ? $_[0] : undef;
}

## $qp = $qp->fromFh($fh,%opts)
##  + wraps $qp->{lexer}->fromFh()
sub fromFh {
  return $_[0]{lexer}->fromFh(@_[1..$#_]) ? $_[0] : undef;
}

## $qp = $qp->fromString($str,%opts)
## $qp = $qp->fromString(\$str,%opts)
##  + wraps $qp->{lexer}->fromString()
sub fromString {
  return $_[0]{lexer}->fromString(@_[1..$#_]) ? $_[0] : undef;
}


##======================================================================
## API: High-level Parsing

## $query_or_undef = $qp->parse(string=>$str)
## $query_or_undef = $qp->parse(string=>\$str)
## $query_or_undef = $qp->parse(file=>$filename)
## $query_or_undef = $qp->parse(fh=>$handle)
sub parse {
  my $qp = shift;
  $qp->reset();
  $qp->from(@_);
  my $result = eval { $qp->yyparse(); };
  delete($qp->{parser}{qp});       ##-- chop circular reference we know how to get at...
  delete($qp->{parser}{USER}{qp}); ##-- chop circular reference we know how to get at...

  ##-- how'd it go?
  return $result if (!$@);
  $qp->{error} = $@ if (!$qp->{error});
  return undef;
}

## $query_or_undef = $qp->yyparse()
##  + parses from currently selected input source; no reset or error catching
sub yyparse {
  my $qp = shift;
  return $qp->{parser}->YYParse(
				yylex   => $qp->{yylex},
				yyerror => $qp->{yyerror},
				yydebug => $qp->{yydebug},
			       );
}

##======================================================================
## API: Mid-level: Query Generation API

##------------------------------------------------------
## Query Generation API: Mid-level: generic

## $q = $parser->newq($class,%args)
##  + wrapper for DDC::Query->new(class=>$class,%args); called by yapp parser
sub newq {
  return DDC::Query->new(class=>$_[1],@_[2..$#_]);
}

## $qf = $parser->newf($class,%args)
##  + wrapper for DDC::Query::Filter->new(class=>$class,%args); called by yapp parser
sub newf {
  return DDC::Query::Filter->new(class=>$_[1],@_[2..$#_]);
}


##======================================================================
## API: Low-LEVEL: Parse::Lex <-> Parse::Yapp interface
##
## - REQUIREMENTS on yylex() sub:
##   + Yapp-compatible lexing routine
##   + reads input and returns token values to the parser
##   + our only argument ($MyParser) is the YYParser object itself
##   + We return a list ($TOKENTYPE, $TOKENVAL) of the next tokens to the parser
##   + on end-of-input, we should return the list ('', undef)
##

## \&yylex_sub = $qp->_yylex_sub()
##   + returns a Parse::Yapp-friendly lexer subroutine
sub _yylex_sub {
  my $qp = shift;
  my ($type,$text,@expect);

  return sub {
    $qp->{yyexpect} = [$qp->{parser}->YYExpect];
    ($type,$text) = $qp->{lexer}->yylex();
    return ('',undef) if ($type eq '__EOF__');

    ##-- un-escape single-quoted symbols
    if ($type =~ /^SQ_(.*)$/) {
      $type = $1;
      $text = unescapeq($text);
    }
    elsif ($type eq 'SYMBOL') {
      $text = unescape($text);
    }

    if ($qp->{yydebug} & 0x01) {
      print STDERR ": yylex(): type=($type) ; text=($text)\n";
    }

    return ($type,$text);
  };
}


## \&yyerror_sub = $qp->_yyerror_sub()
##  + returns error subroutine for the underlying Yapp parser
sub _yyerror_sub {
  my $qp = shift;
  my (%expect);
  return sub {
    @expect{@{$qp->{yyexpect}||[]}}=qw();
    @expect{@{$qp->{yyexpect}||[]}, $qp->{parser}->YYExpect}=qw();
    $qp->{error} = ("Syntax error in ".$qp->{lexer}->yywhere().":\n"
		    #." > Expected one of (here): ".join(', ', map {$_ eq '' ? '__EOF__' : $_} $qp->{parser}->YYExpect)."\n"
		    #." > Expected one of (prev): ".join(', ', map {$_ eq '' ? '__EOF__' : $_} @{$qp->{yyexpect}||['???']})."\n"
		    ." > Expected one of: ".join(', ', sort map {$_ eq '' ? '__EOF__' : $_} keys %expect)."\n"
		    ." > Got: ".$qp->{lexer}->yytype.' "'.$qp->{lexer}->yytext."\"\n"
		   );
  };
}


1; ##-- be happy


##========================================================================
## POD DOCUMENTATION, auto-generated by podextract.perl
=pod

=cut

##========================================================================
## NAME
=pod

=head1 NAME

DDC::Query::Parser - extendable full-text index using mysql: high-level query parser

=cut

##========================================================================
## SYNOPSIS
=pod

=head1 SYNOPSIS

 ##========================================================================
 ## PRELIMINARIES

 use DDC::Query::Parser;

 ##========================================================================
 ## Constructors etc.

 $qp    = $CLASS_OR_OBJ->new(%args);
 $undef = $qp->free();                ##-- explicit destruction REQUIRED!

 ##========================================================================
 ## API: High-level Parsing

 $undef = $qp->reset();
 $query_or_undef = $qp->parse(@query_strings);

 ##========================================================================
 ## API: Mid-level: Query Generation API

 $q = $parser->newQuery(@args);

 ##========================================================================
 ## API: Low-level: Lexer/Parser Connecton, Error Reporting, etc.

 \&yylex_sub   = $qp->_yylex_sub();
 \&yyerror_sub = $qp->_yyerror_sub();
 $errorString  = $qp->setError($errorCode,\%userMacros);

=cut

##========================================================================
## DESCRIPTION
=pod

=head1 DESCRIPTION

DDC::Query::Parser is a high-level parser for user queries
expressed in the DDC query language.  It uses a native perl scanner
(DDC::Query::yylexer)
and a Parse::Yapp generated parser
(DDC::Query::yyparser)
for low-level parsing.

=cut

##----------------------------------------------------------------
## DESCRIPTION: DDC::Query::Parser: Constructors etc.
=pod

=head2 Constructors etc.

=over 4

=item new

 $qp = $CLASS_OR_OBJ->new(%args);

Constructor.
B<NOTE>: you should probably call free() before destroying the returned object to be safe.

Object structure / known %args:

   {
    ##-- Status flags
    errstr => $current_errstr, ##-- false indicates no error

    ##-- Underlying lexer/parser pair
    lexer   => $yylexer,   ##-- a DDC::Query::yylexer object
    parser  => $yyparser,  ##-- a DDC::Query::yyparser object
    yydebug => $mask,      ##-- yydebug value

    ##-- Closures
    yylex    => \&yylex,   ##-- yapp-friendly lexer sub
    yyerror  => \&yyerror, ##-- yapp-friendly error sub
   }


=item free

 $undef = $qp->free();

Performs required pre-destruction cleanup (trims circular references, etc.),
in particular:
clears $qp itself, as well as $qp-E<gt>{parser}{USER},
which makes $qp subsequently useless, but destroyable.

=item useIndex

 $qp = $qp->useIndex($index);

Sets up parser to use the DDC index $index.

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DDC::Query::Parser: API: High-level Parsing
=pod

=head2 API: High-level Parsing

The following methods comprise the top-level parsing API.

=over 4

=item reset

 undef = $qp->reset();

Reset all parse-relevant data structures in preparation for
parsing a new query.

=item parse

 $query_or_undef = $qp->parse(@query_strings);
 $query_or_undef = $qp->parse(\*query_fh)

Parse and return a user query as a DDC::Query::Base object (or subclass)
from a (list of) string(s) [first form], or from an open filehandle [second form].
If an error is encountered, parse() returns C<undef>.

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DDC::Query::Parser: API: Mid-level: Query Generation API
=pod

=head2 API: Mid-level: Query Generation API

The following methods comprise the mid-level parsing API.
Users should never need to call these methods directly, but they
may be useful if you are deriving a new parser (sub)class, e.g.
implementing an alternate query syntax.

=over 4

=item newQuery

 $q = $parser->newQuery(@args);

Wrapper for $parser-E<gt>{index}-E<gt>newQuery(@args)

=item finishQuery

 $q = $parser->finishQuery($srcQuery);

Imposes default 'hit' restrictor on parsed query $srcQuery,
and other finalizing touches (join insertion, variable merge, independent variable check,
meta expansion).

=item sqlQuoteString

 $quotedStr = $qp->sqlQuoteString($str);

Adds single quotes around $str and escapes any string-internal single quotes.

=item newVariable

 $varLabel = $qp->newVariable(%args);

Wrapper for $qp-E<gt>{qtmp}-E<gt>newVariable(),
with different default semantics.
Known %args:

 label => $varLabel,    ##-- variable label           (default=(generated))
 table => $varTable,    ##-- variable table name      (default=$qp->{qtmp}{default_table})
 tok   => $bool,        ##-- is this var independent? (default=true)

=item newReference

 $varLabel = $qp->newReference(src=>$srcVarName, ref=>$refColName);

Wrapper for newVariable() which creates a new variable dependent
on $srcVarName which will be joined to the table referenced by
the 'ref' field $refColName, for transparent de-referencing
in queries.  Implementation handles variable
aliasing by naming conventions, and performs some basic sanity checks.

=item parseReference

 [$tokVar,$attr,...] = $CLASS_OR_OBJ->parseReference($varName);

Parses a reference returned by the low-level parser as a dot-separated
string of the form "${tokenVarName}.${refAttr1}.(...).${refAttrN}.${attrName}".

=item reference2token

 $tokenVarName = $CLASS_OR_OBJ->reference2token($varName);

Hack: get the name of the independent (token) variable associated
with the dot-separated string $varName.

=item constantQuery

 $q = $parser->constantQuery($sqlWhereFragment);

Simple constant query (boolean true or false).
Can also be used to add literal SQL fragments to a query object.

=item literalQuery

 $q = $parser->literalQuery($literal_text);

Handler for "literal" single-word or -string queries.
Default is an atribute query on $q-E<gt>{defaultAttribute}
via $q-E<gt>{defaultOp} with value $literal_text.

Sets $q-E<gt>{tok} to the newly generated variable as a side-effect.

=item soundsLikeQuery

 $q = $qp->soundsLikeQuery($attributeId,$soundsLikeText);

Handler for "sounds-like" queries over $attributeId with (orthographic) value $soundsLikeText.
Default uses path 'type.pho' on $varName, 'pho' on $soundsLikeText (implicit table: 'type').

=item attributeQuery

 $q = $parser->attributeQuery($attributeId, $sqlOpFragment, $sqlValueFragment);

Handler for generic attribute queries,
where $attributeId = [$varName,$attrName].

=item attributeValue

 $sqlStr = $qp->attributeValue($attributeIdOrValue);

Returns an SQL-string representing $attributeIdOrValue,
where $attributeIdOrValue is one of the following:

=over 4

=item *

a literal value (numeric or string, pre-parsed)

=item *

a pair [ $varName, $attrName ]

=back



=item parseReferencePath

 $varName  = $qp->parseReferencePath([$varNameOrUndef]);
 $varNameN = $qp->parseReferencePath([$varNameOrUndef,@refNames])

Calls newVariable() if $varNameOrUndef is undefined to allocate
an independent base variable, and calls newReference() for each
reference named in @refNames to perform nested variable de-referencing.

=item parseAttributePath

 [$varName,$attrPath] = $qp->parseAttributePath([@refPath,$attrName]);

Wrapper which calls parseReferencePath() on non-final components
of [@refPath,$attrName] and returns and $attributeId ARRAY-ref
[$varName,$attrPath] representing the (de-referenced) argument array.

=item precedesQuery

 $q = $parser->precedesQuery($q1,$q2);

Enforces restriction that all 'tok' variables in $q1 precede all those in $q2 (by primary key).

=item sqlGreatestId

 $sqlFragment = $qp->sqlGreatestId($query,\@tokenVars);

Returns SQL fragment representing the value of the greatest primary key
of any independent variable named in \@tokenVars.

=item sqlLeastId

 $sqlFragment = $qp->sqlLeastId($query,\@tokenVars);

Returns SQL fragment representing the value of the smallest primary key
of any independent variable named in \@tokenVars.

=item sqlMinMax

 $sqlFragment = $qp->sqlMinMax($func,$query);
 $sqlFragment = $qp->sqlMinMax($func,$query,\@tokenVars)

Guts for sqlGreatestId() and sqlLeastId(): returns $tokenVars[0]
if only one token is specified in @tokenVars,
otherwise applies SQL function $func to SQL forms of @tokenVars.

=item sequenceQuery

 $q = $parser->sequenceQuery(\@queryList);

Handler for back-to-back ordered sequences of queries.
Default implementation interprets these as serial order
of independent variables' primary keys.

=item unearQuery

 $q = $qp->unearQuery($maxDist, \@queryList)

Handles unordered 'near' queries over at most $maxDist
intervening tokens.

=item nearQuery

 $q = $qp->nearQuery($maxDist, \@queryList);

Handles ordered 'near' queries over at most $maxDist
intervening tokens.

=item withinQuery

 $q = $parser->withinQuery($srcQuery, $withinTabName);

Handles 'within' queries: imposes default 'hit' container
by join clause manipulation.

=item metaQueryLocal

 $q = $qp->metaQueryLocal($metaPath,$srcQuery,$sqlOpFragment,$sqlValueFragment);

Handler for metadata queries.
Current version performs immediate expansion on all token vars in $srcQuery.
This is the Right Way To Do It if metadata queries should be locally scoped.


=item metaQueryDelayed

 undef = $qp->metaQueryDelayed($metaPath, $sqlOpFragment, $sqlValueFragment);

Alternate handler for metadata queries (currently unused).
This version performs no expansion when the meta-query is parsed,
but rather enqueues all metadata queries for later expansion (e.g.
on $qp-E<gt>finishQuery()).
This would be the Right Way To Do It if metadata queries should always
be interpreted as globally scoped.


=item expandMeta

 $q_expanded = $qp->expandMeta($q);

Expands delayed metadata conditions in $qp-E<gt>{meta} (if any) into $q.

=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DDC::Query::Parser: API: Low-level: error expansion and reporting: YYParser
=pod

=head2 API: Low-level: Lexer/Parser Connection, Error Reporting, etc.

=over 4

=item _yylex_sub

 \&yylex_sub = $qp->_yylex_sub();

Returns a Parse::Yapp-friendly lexer subroutine.


=item _yyerror_sub

 \&yyerror_sub = $qp->_yyerror_sub();

Returns error subroutine for the underlying Yapp parser.


=item setError

 $errorString = $qp->setError($errorCode,\%userMacros);

Should set $qp-E<gt>{errstr} to expanded $errorString.

default behavior just replaces the following macros in $errorCode:

 __LINE__
 ___COL__
 __LEXTOKNAME__
 __LEXTOKTEXT__
 __TOKNAME__
 __TOKTEXT__
 __EXPECTED__


=back

=cut

##----------------------------------------------------------------
## DESCRIPTION: DDC::Query::Parser: I/O: Hooks
=pod

=head2 I/O: Hooks

=over 4

=item preSaveHook

 $tmpData = $obj->preSaveHook();

Sanitize object for save, returns temprorary data.

=item postSaveHook

 undef = $obj->postSaveHook($tmpData);

(undocumented)

=item postLoadHook

 undef = $obj->postLoadHook();

(undocumented)


=back

=cut

##========================================================================
## END POD DOCUMENTATION, auto-generated by podextract.perl
=pod



=cut

=cut

##======================================================================
## Footer
##======================================================================

=pod

=head1 ACKNOWLEDGEMENTS

Perl by Larry Wall.


=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>


=head1 COPYRIGHT AND LICENSE

Copyright (C) 2011 by Bryan Jurish

This package is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.

=head1 SEE ALSO

perl(1),
DDC(3perl),
DDC::Query::yylexer(3perl),
DDC::Query::yyparser(3perl),
DDC::Query.pm(3perl)

=cut
