#!/usr/bin/env perl

# ABSTRACT: HTTP daemon for PML-TQ (PML Tree Query) queries

=head1 SYNOPSIS

pmltq_http [options]

or
  pmltq_http -u          for usage
  pmltq_http -h          for help
  pmltq_http --man       for the manual page
  pmltq_http --version   for version

=head1 DESCRIPTION

B<pmltq_http> starts a small HTTP daemon that provieds HTTP(s)
interface to PML-TQ queries over a SQL query engine.

Optional parameters can specify which SQL database configuration and
SSL keys to use. If the SSL keys are not found, the daemon runs in a
non-SSL mode.

Note: the daemon binds to all interfaces!

The URLs for client requests are:

  http://localhost:8082/form
  ...


=head1 OPTIONS

=over 5

=item B<--port> number

Specify the port to listen on. Defaults to 8082.

=item B<--log|-l> log_file

Specify path to a log file (defualt is to print log on STDOUT).

=item B<--query-log-dir|-d> log_dir

Specify path to a directory where to log user queries (default is
.pmltq_cgi_log in the current directory).

=item B<--ssl_ca> filename

Path to the SSL ca file. Defaults to config/server.ca.

=item B<--ssl_cert> SSL_cert_file

Path to the SSL cert file. Defaults to config/server.cert.

=item B<--ssl_key> SSL key file

Path to the SSL key file. Defaults to config/server.key.

=item B<--config-file|-c> filename

Specify a configuration file. The configuration file is a XML file (in
fact, a PML instance conforming to the treebase_conf_schema.xml) that
lists available SQL engine configurations. If this option is not
provided, B<pmltq> attempts to find a file named treebase.conf in the
resource paths (namely in config/ sub-directory and in ~/.tred.d).

=item B<--server|-s> id

ID of the server configuration in the configuration file (see above).

=item B<--single|-S>

Operate in single-server mode (default is pre-forking mode).

=item B<--forking|-f> number

Operate in pre-forking mode, starting given number of servers. This is
default mode (with 5 servers).

=item B<--resource-dir|-Z> dirname

Directory to search for resource files. These files can be served even
if not in the database and can be queried by base file name or even
with arbitrary directory preceding the base filename. Do not put any
sensitive files to the resource path!

=item   B<--auth-file> filename

Path to a file containing user access configuration. Updates to this
file are applied immediately without the need to restart the server.

Records (lines) in this file may have the following format:

# <comment>

<username>: <password>

<username>: <password> : <authorization>

where <authorization> is a comma-separated list of server IDs (see
--server). If the list is preceded by the minus (-) sign, the user is
authorized this service unless the server ID is present in the list.
If this list is preceded by the plus (+) sign or no sign at all, the
user is authorized to connect to this service, if and only if the
server ID is present in the list. If the list <authorization> list is
not present, the user is authorized to connect to any service.

The information about other services is be used to serve requests
about other running instances.

=item B<--debug|-D>

Print some extended information (e.g. evaluation benchmarks).

=item B<--usage|-u>

Print a brief help message on usage and exits.

=item B<--help|-h>

Prints the help page and exits.

=item B<--man>

Displays the help as manual page.

=item B<--version>

Print program version.

=back

=cut


our $VERSION = '0.6';
use Getopt::Long;
use Pod::Usage;
my %opts;
BEGIN {

  Getopt::Long::Configure ("bundling");
  GetOptions(\%opts,
	     'debug|D',
	     'config-file|c=s',
	     'pid-dir=s',
	     'static-dir=s',
	     'port|p=i',
	     'ssl_ca=s',
	     'ssl_cert=s',
	     'ssl_key=s',
	     'server|s=s',
	     'auth-file|a=s',
	     "resource-dir|Z=s@",
	     'single|S',
	     'forking|f=i',
	     'log|l=s',
	     'query-log-dir|d=s',
	     'google-translate',
	     'ms-translate=s',
             'ga-tracking-domain=s',
             'ga-tracking-code=s',
	     'tmp-dir=s',
	     'quiet|q',
	     'help|h',
	     'usage|u',
	     'version|V',
	     'man',
	    ) or $opts{usage}=1;

  if ($opts{usage}) {
    pod2usage(-msg => 'pmltq');
  }
  if ($opts{help}) {
    pod2usage(-exitstatus => 0, -verbose => 1);
  }
  if ($opts{man}) {
    pod2usage(-exitstatus => 0, -verbose => 2);
  }
  if ($opts{version}) {
    print "$VERSION\n";
    exit;
  }
}

use Benchmark;
use Net::HTTPServer;
use CGI;
use HTTP::Request;
use HTTP::Request::AsCGI;
use FindBin;
use lib (glob(File::Spec->catfile(${FindBin::RealBin},'libs','*','')));
use Treex::PML;
use List::Util qw(first);

BEGIN {
  require PMLTQ::CGI;
  use File::Basename;
  use PMLTQ;
  Treex::PML::AddResourcePathAsFirst(File::Spec->catfile(PMLTQ->home(),'resources'));
  Treex::PML::AddResourcePathAsFirst(grep length, @{$opts->{'resource-dir'}}) if @{$opts->{'resource-dir'}};
}
use PMLTQ::CGI;
use IO::Scalar;

BEGIN {
  
  PMLTQ::CGI::Configure({
    'static-dir' => $opts{'static-dir'} || File::Spec->catdir((fileparse($INC{File::Spec->catfile('PMLTQ','CGI.pm')}))[1],'static'),
    'config-file' => $opts{'config-file'} || File::Spec->catfile(${FindBin::RealBin},'config','pmltq_cgi.conf'),
    'server' => $opts{'server'},
    'pid-dir' => $opts{'pid-dir'},
    'port' => $opts{'port'},
    'debug' => $opts{'debug'},
    'query-log-dir' => $opts{'query-log-dir'},
    'auth-file' => $opts{'auth-file'},
    'tmp-dir' => $opts{'tmp-dir'},
    'google-translate' => $opts{'google-translate'},
    'ms-translate' => $opts{'ms-translate'},
    'ga-tracking-domain' => $opts{'ga-tracking-domain'},
    'ga-tracking-code' => $opts{'ga-tracking-code'},
  });
}

sub handle {
  my ($call,$req)=@_;
  my $res = $req->Response;
  my $env = $req->Env();
  my $code;
  my $stdout;
  {
    my $request = HTTP::Request->parse($req->Request);
    my $c = HTTP::Request::AsCGI->new(
      $request,
      #      %$env, # don't use, reports wrong URL
      HTTPS=> $ssl_opts{ssl} ? 'on' : 'off',
      REMOTE_USER => $env->{'REMOTE_USER'},
    )->setup;
    my $cgi = CGI->new();
    eval {
      $code = $call->($cgi);
    };
    if ($@) {
      warn $@;
      $code=500;
    }
    $stdout=$c->stdout;
  }
  my $t0 = new Benchmark;

  while ( my $line = $stdout->getline ) {
    $line=~s/\015?\012$//;
    last unless length $line;
    $res->Header($1,$2) if ($line=~/(.*?): (.*)$/);
  }
  while ( my $line = $stdout->getline ) {
    $res->Print($line);
  }
  my $t1 = new Benchmark;
#  print STDERR "Forwarding CGI results took ",timestr(timediff($t1,$t0)),"\n";
  $res->Code($code);
  return $res;
}

my %ssl_defaults = (
  ssl_ca => 'server.ca',
  ssl_cert => 'server.crt',
  ssl_key => 'server.key',
);
my %ssl_opts = map {$_=>$opts{$_}||Treex::PML::FindInResources($ssl_defaults{$_})} qw(ssl_ca ssl_cert ssl_key);

if (scalar(grep { defined($_) && -r $_ } values %ssl_opts)==3) {
  $ssl_opts{ssl}=1;
  print STDERR "Found SSL key files, enabling SSH mode!\n"
} else {
  #  use Data::Dumper;
  #  print Dumper(\%ssl_opts);
  print STDERR "SSL key files missing, SSL disabled!\n".
    "Hint: To enable SSL support, create server.crt, server.key, and server.ca files in the config sub-directory\n".
    "      or use corresponding command-line options.\n";
}

my $server = new Net::HTTPServer(
  port=>$opts{port}||8082,
  log => $opts{logfile} || 'STDOUT',
  type=>$opts{single} ? 'single' : 'forking',
  ($opts{forking} ? ( numproc => $opts{forking}) : ( )),
  %ssl_opts,
);

my %re_handlers = (
  '/data/.*'   => sub{ handle(\&PMLTQ::CGI::resp_data,@_) },
  '/static/.*' => sub{ handle(\&PMLTQ::CGI::resp_static,@_) }, # no authorization required
);
my %handlers = (
  '/'  => 'PMLTQ::CGI::resp_root',
  '/other'  => 'PMLTQ::CGI::resp_other_services',
  '/about'  => 'PMLTQ::CGI::resp_about',
  '/form'   => 'PMLTQ::CGI::resp_form',
  '/query'  => 'PMLTQ::CGI::resp_query',
  '/query_svg'  => 'PMLTQ::CGI::resp_query_svg',
  '/schema' => 'PMLTQ::CGI::resp_schema',
  '/type'   => 'PMLTQ::CGI::resp_type',
  '/nodetypes'   => 'PMLTQ::CGI::resp_nodetypes',
  '/node'   => 'PMLTQ::CGI::resp_node',
  '/relations'   => 'PMLTQ::CGI::resp_relations',
  '/relation_target_types'   => 'PMLTQ::CGI::resp_relation_target_types',
  '/svg'   => 'PMLTQ::CGI::resp_svg',
  '/n2q'   => 'PMLTQ::CGI::resp_n2q',
  '/past_queries'   => 'PMLTQ::CGI::resp_past_queries',
  '/version'   => 'PMLTQ::CGI::resp_version',
  '/favicon.ico'   => 'PMLTQ::CGI::resp_favicon',
);

$server->RegisterURL({
  map {
    my $sub = \&{$handlers{$_}};
    (
      '/app/login'   => sub { handle(\&PMLTQ::CGI::resp_login,@_) },
      ($_ => sub { handle($sub,@_) }),
      ('/app'.$_ => sub { handle(sub { PMLTQ::CGI::app($sub,@_) } ,@_) }),
    )
  } keys %handlers
 });

$server->RegisterRegex($_=>$re_handlers{$_}) for keys %re_handlers;

if ($opts{'auth-file'}) {
    unless (PMLTQ::CGI::configuration->{anonymous_access}) {
        print STDERR "AUTH is required on root.\n";
        $server->RegisterAuth('Digest','/','PMLTQ',\&auth);
    }
    $server->RegisterAuth('Digest','/app/login','PMLTQ',\&auth);
    $server->{AUTH}->{'/app'}=undef;
    $server->{AUTH}->{'/favicon.ico'}=undef;
    #$server->{AUTH}->{'/static'}=undef; # hack: remove authorization requirement for the /static folder
}

sub auth {
  my ($url,$user)=@_;
  my @ret = &PMLTQ::CGI::auth(@_);
  if ($ret[0] eq "200") {
    unless ($url eq '/app/login' or exists($handlers{$url}) or (first { $url=~m{^$_} } keys %re_handlers)) {
      print STDERR scalar(localtime).": denied AUTH to $user for $url: code 403.\n";
      $ret[0]="403";
    }
  }
  return @ret;
}

$server->Start;

$server->Process;