#!/usr/bin/perl
#
# Copyright 2013 Timo Benk
# 
# This file is part of nrun.
# 
# nrun is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# nrun is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with nrun.  If not, see <http://www.gnu.org/licenses/>.
#
# Program: ncopy
# Author:  Timo Benk <benk@b1-systems.de>
# Date:    Fri May 24 08:13:14 2013 +0200
# Ident:   d8e31427a6c4750dd236d1fb48878fdfde4c1e09
# Branch:  master
#
# Changelog:--reverse --grep '^tags.*relevant':-1:%an : %ai : %s
# 
# Timo Benk : 2013-04-29 18:53:21 +0200 : introducing ncopy
# Timo Benk : 2013-04-29 19:02:10 +0200 : use Net::Ping instead of the command ping
# Timo Benk : 2013-04-29 20:39:37 +0200 : File::HomeDir dependency removed
# Timo Benk : 2013-05-04 07:13:02 +0200 : hostnames can be given at the commandline
# Timo Benk : 2013-05-05 18:37:46 +0200 : pod docs moved into the code files
# Timo Benk : 2013-05-05 18:41:11 +0200 : modules were not found when INSTALL_BASE was set
# Timo Benk : 2013-05-06 09:04:16 +0200 : argument --version added
# Timo Benk : 2013-05-07 13:15:33 +0200 : package name changed for CPAN
# Timo Benk : 2013-05-08 10:05:39 +0200 : better signal handling implemented
# Timo Benk : 2013-05-09 07:31:52 +0200 : fix race condition in semaphore cleanup code
# Timo Benk : 2013-05-09 07:38:43 +0200 : cleanup on exit
# Timo Benk : 2013-05-11 13:08:04 +0200 : default to 5 parallel processes
# Timo Benk : 2013-05-13 11:12:49 +0200 : commandline syntax simplified
# Timo Benk : 2013-05-13 11:28:17 +0200 : getopt timeout short param removed
# Timo Benk : 2013-05-13 13:47:10 +0200 : some command abbreviations added
# Timo Benk : 2013-05-13 15:58:07 +0200 : child processes died before the result was handled
# Timo Benk : 2013-05-13 19:56:46 +0200 : semaphores will only be deleted in parent process
# Timo Benk : 2013-05-21 18:47:43 +0200 : parameter --async added
# Timo Benk : 2013-05-22 08:28:30 +0200 : rc file uses now yaml syntax
# Timo Benk : 2013-05-22 13:09:13 +0200 : option --no-logfile was broken
# Timo Benk : 2013-05-22 13:20:36 +0200 : --skip-ping-check and --skip-ns-check enabled
# Timo Benk : 2013-05-22 13:22:48 +0200 : --copy cleanup will not be logged
# Timo Benk : 2013-05-22 18:10:55 +0200 : dump the exit code per default in ncopy
# Timo Benk : 2013-05-22 18:42:06 +0200 : timeout defaults to 60 if not set
# Timo Benk : 2013-05-23 10:09:31 +0200 : unnecessary wait() call in signal handler removed
# Timo Benk : 2013-05-24 08:03:19 +0200 : generic mode added
# Timo Benk : 2013-05-24 08:13:14 +0200 : default mode will be marked in usage string
#

package App::ncopy;

use strict;
use warnings;

use FindBin;
use lib "$FindBin::Bin/../lib";
use lib "$FindBin::Bin/../lib/perl5";

use File::Path;
use File::Basename;
use Date::Format;
use Getopt::Long;
use Net::Ping;
use POSIX qw(getuid);

use NRun::Util;
use NRun::Version;
use NRun::Worker;
use NRun::Dumper;
use NRun::Logger;
use NRun::Dispatcher;

our $options = {};

###
# dump a short usage info to stdout.
sub usage {

    print "usage: " . basename($0) . " -t HOST1[,HOST2,...] -- SRC DST\n";
    print "--async,-a               asynchronous output mode.\n";
    print "--dump-output,-d         instead of dumping the exit status, dump the command output.\n";
    print "--log-directory,-l DIR   base directory for the log files.\n";
    print "--no-hostname            omit hostname prefix.\n";
    print "--no-logfile             do not generate any log files.\n";
    print "--parallel,-p MAX        number of parallel connections (defaults to 5).\n";
    print "--skip-ping-check        skip checking if the host answers on ping.\n";
    print "--skip-ns-check          skip checking if the hostname is resolvable.\n";
    print "--target,-t HOST1,HOST2  comma separated list of target hosts.\n";
    print "--timeout SEC            timeout for each command execution (defaults to 60).\n";
    print "--version,-v             print the version string and exit.\n";
    print "--mode,-m MODE           remote execution mode:\n";

    foreach my $object (values(%{NRun::Worker::workers()})) { 

        if (defined($options->{arg_mode}) and $options->{arg_mode} eq $object->{MODE}) {

            print "                         " . $object->{MODE} . " - " . $object->{DESC} ." [default]\n";
        } else {

            print "                         " . $object->{MODE} . " - " . $object->{DESC} ."\n";
        }
    }

    exit;
}

###
# parse the commandline.
sub parse_commandline {

    my $arg_target          = $options->{arg_target};
    my $arg_async           = $options->{arg_async };
    my $arg_parallel        = $options->{arg_parallel};
    my $arg_dump_output     = $options->{arg_dump_output};
    my $arg_no_hostname     = $options->{arg_no_hostname};
    my $arg_no_logfile      = $options->{arg_no_logfile};
    my $arg_log_directory   = $options->{arg_log_directory};
    my $arg_mode            = $options->{arg_mode};
    my $arg_skip_ping_check = $options->{arg_skip_ping_check};
    my $arg_skip_ns_check   = $options->{arg_skip_ns_check};
    my $arg_timeout         = $options->{arg_timeout};
    my $arg_version         = $options->{arg_version};

    my $ret = GetOptions (
        "async|a"           => \$arg_async,
        "target|t=s"        => \$arg_target,
        "parallel|p=i"      => \$arg_parallel,
        "no-hostname"       => \$arg_no_hostname,
        "dump-output|d"     => \$arg_dump_output,
        "log-directory|l=s" => \$arg_log_directory,
        "timeout=i"         => \$arg_timeout,
        "no-logfile"        => \$arg_no_logfile,
        "mode|m=s"          => \$arg_mode,
        "skip-ping-check"   => \$arg_skip_ping_check,
        "skip-ns-check"     => \$arg_skip_ns_check,
        "version|v"         => \$arg_version,
    );

    usage() if (not $ret);

    if (defined($arg_version)) {

        print basename($0) .  " " . $NRun::Version::VERSION . "\n";
        exit(0);
    }

    if (scalar(@ARGV) == 1) {

        print "error: destination file is missing.\n";
        usage();
    } elsif (scalar(@ARGV) == 0) {

        print "error: source file is missing.\n";
        usage();
    }

    if (not defined($arg_mode)) {

        print "error: parameter --mode is mandatory.\n";
        usage();
    }

    if (not defined($arg_target)) {

        print "error: parameter --target is mandatory.\n";
        usage();
    }

    my $date = time2str("%Y%m%d_%H_%M_%S", time);

    $options->{target}          = [ split(",", $arg_target) ];
    $options->{parallel}        = $arg_parallel ? $arg_parallel : 5;
    $options->{source}          = $ARGV[0];
    $options->{destination}     = $ARGV[1];
    $options->{timeout}         = $arg_timeout ? $arg_timeout : 60;
    $options->{no_logfile}      = $arg_no_logfile;
    $options->{skip_ns_check}   = $arg_skip_ns_check;
    $options->{skip_ping_check} = $arg_skip_ping_check;
    $options->{mode}            = lc($arg_mode);

    $options->{dumpmode} = "result";
    if (defined($arg_dump_output)) {

        $options->{dumpmode} = "output";
    }

    if (defined($arg_async)) {

        $options->{dumpmode} .= "_async";
    } else {

        $options->{dumpmode} .= "_sync";
    }

    if (defined($arg_no_hostname)) {

        $options->{dumpmode} .= "_no_hostname";
    } else {

        $options->{dumpmode} .= "_hostname";
    }

    $options->{log_directory} = "copy/" . $date;
    if (defined($arg_log_directory)) {

        $options->{log_directory} = "$arg_log_directory/$options->{log_directory}";
    } else {

        $options->{log_directory} = NRun::Util::home() . "/.nrun/$options->{log_directory}";
    }

    if ($options->{parallel} < 1) {

        print "error: parameter --parallel must be bigger than 1.\n";
        usage();
    }

    $options->{hosts} = [];
    foreach my $target (@{$options->{target}}) {

        $options->{hosts} = [ @{$options->{hosts}}, NRun::Util::resolve_target($target, $options->{alias}) ];
    }
    $options->{hosts} = [ NRun::Util::uniq(@{$options->{hosts}}) ];

    if (scalar(@{$options->{hosts}}) == 0) {

        print "error: no hostnames given.\n";
        usage();
    }
}

###
# callback function used by the dispatcher
sub callback_action {
    
    my $_host = shift;

    my $worker = NRun::Worker::workers()->{$options->{mode}}->{NAME}->new();

    my $dumper = NRun::Dumper->new (
        {
            hostname  => $_host,
            mode      => $options->{dumpmode},
            semaphore => $options->{sem_dumper},
        }
    );

    my $logger = undef;
    if (not defined($options->{no_logfile})) {

        $logger = NRun::Logger->new (
            {
                hostname  => $_host,
                basedir   => $options->{log_directory},
                semaphore => $options->{sem_logger},
            }
        );
    }

    $worker->init (
        {
            %$options,
            hostname => $_host,
            dumper   => $dumper,
            logger   => $logger,
        }
    );

    if (not $worker->pre_check()) {

        $worker->destroy();
        return;
    }

    $worker->copy($options->{source}, $options->{destination});
    $worker->destroy();
}

###
# ensure exit on TERM, INT and ALRM
sub handler {

    exit();
}

###
# main
sub main {

    NRun::Signal::register('TERM', \&handler);
    NRun::Signal::register('INT',  \&handler);
    NRun::Signal::register('ALRM', \&handler);

    $options = NRun::Util::read_config_files (
        [
            "$FindBin::Bin/../etc/nrunrc",
            "/etc/nrunrc",
            NRun::Util::home() . "/.nrunrc" 
        ]
    );

    $options->{sem_logger} = NRun::Semaphore->new();
    $options->{sem_dumper} = NRun::Semaphore->new();

    $options->{sem_ppid} = $$;

    NRun::Worker::init($options, $options->{sem_worker});

    parse_commandline();

    my $dispatcher = NRun::Dispatcher->new (
        {
            nmax    => $options->{parallel},
            timeout => $options->{timeout},
            objects => $options->{hosts},
    
            callback_action => \&callback_action,
        }
    );

    $dispatcher->run();
}

main();

END {

    if ($options->{sem_ppid} == $$) {

        $options->{sem_logger}->delete();
        $options->{sem_dumper}->delete();
    }
}

__END__

=pod

=head1 NAME

ncopy - copy a file or directory to multiple target servers.

=head1 SYNOPSIS

ncopy -t HOST1[,HOST2,...] [-p MAX] [-l DIR] [--timeout SEC]
[--mode MODE] [--dump-output] [--no-hostname] [--no-logfile]
[--skip-ping-check] [--skip-ns-check] [--version] [--async]
-- SRC DST

=head1 DESCRIPTION

ncopy will copy a file or directory to multiple target servers.

the underlying remote access mechanism is exchangeable. as of now, ssh, nsh, rsh
and local execution modes are implemented.

=head1 CONFIGURATION

special configuration options for the different modes and additional all
commandline options can be given in a configuration file.

the following three places will be searched for configuration files (values in the last
configuration file will overwrite values in the first configuration file). indentation
does matter.

- $FindBin::Bin/../etc/nrunrc

- /etc/nrunrc

- $HOME/.nrunrc

    ---
    # mode ssh options
    ssh_copy: >
        /usr/bin/scp
        -o User=root
        -o PreferredAuthentications=hostbased,publickey
        -o StrictHostKeyChecking=no
        -o UserKnownHostsFile=/dev/null
        -o LogLevel=FATAL
        SOURCE HOSTNAME:TARGET
    
    ssh_exec: >
        /usr/bin/ssh
        -o User=root
        -o PreferredAuthentications=hostbased,publickey
        -o StrictHostKeyChecking=no
        -o UserKnownHostsFile=/dev/null
        -o LogLevel=FATAL
        HOSTNAME COMMAND ARGUMENTS
    
    ssh_delete: >
        /usr/bin/ssh
        -o User=root
        -o PreferredAuthentications=hostbased,publickey
        -o StrictHostKeyChecking=no
        -o UserKnownHostsFile=/dev/null
        -o LogLevel=FATAL
        HOSTNAME rm -f "FILE"
    
    # additional commandline options
    arg_mode: ssh
    arg_parallel: 5
    arg_timeout: 60
    
    # alias definitions
    alias:
        production:
            - host1
            - host2
        development:
            - host3
            - host4
        all:
            - production
            - development

=head1 LOGGING

on each execution run, the command output and exit code will be saved inside the
logging directory. the default logging directory is $HOME/.nrun.

- $LOGDIR/results.log - will contain the exit codes

- $LOGDIR/output.log - will contain the complete command output for all hosts

- $LOGDIR/hosts/HOSTNAME.log - will contain the command output for a single host

=head1 MODES

=head2 mode ssh

use ssh as the underlying remote access mechanism.

the following configuration options must be set in the configuration file:

'ssh_exec'   - commandline for remote execution (COMMAND, ARGUMENTS, HOSTNAME will be replaced)

'ssh_copy'   - commandline for remote copying (SOURCE, TARGET, HOSTNAME will be replaced)

'ssh_delete' - commandline for remote deletion (FILE, HOSTNAME will be replaced)

for passwordless login ssh-agent can be used:

	# ssh-keygen
	# scp .ssh/id_rsa.pub $USER@$HOST:.ssh/authorized_keys

	# eval `ssh-agent`
	# ssh-add

to prevent any ssh interaction the following ssh command paramters are
suggested:

	-o User=root
	-o PreferredAuthentications=hostbased,publickey
	-o StrictHostKeyChecking=no
	-o UserKnownHostsFile=/dev/null
	-o LogLevel=FATAL

=head2 mode rsh

use rsh as the underlying remote access mechanism.

the following configuration options must be set in the configuration file:

'rsh_exec'   - commandline for remote execution (COMMAND, ARGUMENTS, HOSTNAME will be replaced)

'rsh_copy'   - commandline for remote copying (SOURCE, TARGET, HOSTNAME will be replaced)

'rsh_delete' - commandline for remote deletion (FILE, HOSTNAME will be replaced)

=head2 mode local

execute the script locally for each host and set the environment variable
TARGET_HOST on each execution.

'local_exec' - commandline for local execution (COMMAND, ARGUMENTS, HOSTNAME will be replaced)

=head2 mode nsh

use nsh as the underlying remote access mechanism.

the following configuration options must be set in the configuration file:

'nsh_exec'   - commandline for remote execution (COMMAND, ARGUMENTS, HOSTNAME will be replaced)

'nsh_copy'   - commandline for remote copying (SOURCE, TARGET, HOSTNAME will be replaced)

'nsh_delete' - commandline for remote deletion (FILE, HOSTNAME will be replaced)

'nsh_check'  - commandline for the agentinfo check command (HOSTNAME will be replaced)

=head2 mode generic

this is a special generic mode the can be used for arbitrary remote execution
mechanisms.

'cmdline_exec'   - commandline for remote execution (COMMAND, ARGUMENTS, HOSTNAME will be replaced)

'cmdline_copy'   - commandline for remote copying (SOURCE, TARGET, HOSTNAME will be replaced)

'cmdline_delete' - commandline for remote deletion (FILE, HOSTNAME will be replaced)

an example that resembles the mode ssh would look the following way:

    generic_copy: >
        /usr/bin/scp
        -o User=root
        -o PreferredAuthentications=hostbased,publickey
        -o StrictHostKeyChecking=no
        -o UserKnownHostsFile=/dev/null
        -o LogLevel=FATAL
        SOURCE HOSTNAME:TARGET
    
    generic_exec: >
        /usr/bin/ssh
        -o User=root
        -o PreferredAuthentications=hostbased,publickey
        -o StrictHostKeyChecking=no
        -o UserKnownHostsFile=/dev/null
        -o LogLevel=FATAL
        HOSTNAME COMMAND ARGUMENTS
    
    generic_delete: >
        /usr/bin/ssh
        -o User=root
        -o PreferredAuthentications=hostbased,publickey
        -o StrictHostKeyChecking=no
        -o UserKnownHostsFile=/dev/null
        -o LogLevel=FATAL
        HOSTNAME rm -f "FILE"

=head1 OPTIONS

B<--async,-a>                    asynchronous output mode.

B<--dump-output,-d>              instead of dumping the exit status, dump the command output.

B<--log-directory,-l DIR>        base directory for the log files.

B<--mode,-m MODE>                remote execution mode (see MODES)

B<--no-hostname>                 omit hostname prefix.

B<--no-logfile>                  do not generate any log files.

B<--parallel,-p MAX>             number of parallel connections (defaults to 5).

B<--skip-ping-check>             skip checking if the host answers on ping.

B<--skip-ns-check>               skip checking if the hostname is resolvable.

B<--target,-t HOST1[,HOST2,...]> comma separated list of target hosts (see TARGETS).

B<--timeout SEC>                 timeout for each command execution (defaults to 60).

B<--version,-v>                  print the version string and exit.

=head1 EXAMPLES

1. copy file test.tar to host1, host2 and all hosts in the file HOSTS.LST

	$ ncopy --target HOSTS.LST -- test.tar /tmp 

=head1 TARGETS

a target name may be either a filename containing the target hosts, one per line,
an alias definition in the configuration file or simply a hostname.

if there is a conflict, for example an alias named identically as an existing
file, the alias will always overrule the filename and the filename will always
overrule the hostname.

an alias can be defined in the configuration file the following way. an alias
definition may contain additional alias names, filenames or simply hostnames.
indentation does matter.

    # alias definitions
    alias:
        production:
            - host1
            - host2
        development:
            - host3
            - host4
        all:
            - production
            - development

=head1 NOTES

=head2 inspecting long running processes

it is possible to signal nrun with USR1 and USR2 to dump, resp. save the currently
running processes. if nrun is signaled with USR2, it will create a file in
the log directory trace_<%Y%m%d_%H_%M_%S>.log.

the integer in square brackets is the pid of the perl process.

	$ kill -USR1 1234
	host1[14760]: (31800) /usr/bin/ssh -o User=root localhost sleep 120
	host1[14760]: 1
	host1[14760]: 2
	host1[14760]: SIGUSR1 received
	host1[14756]: (31801) /usr/bin/ssh -o User=root localhost sleep 120
	host2[14756]: 1
	host2[14756]: 2
	host2[14756]: SIGUSR1 received

=head2 transferring the public key

the helper script misc/put_pubkey can be used to transfer the ssh public key
to the target hosts without supplying a password for each login. it is meant
to be executed by the nrun script in mode local.

	$ nrun -t HOSTS.LST --mode local --timeout 120 -- ./put_pubkey KEY USER PWD

=head1 AUTHOR

Timo Benk <benk@b1-systems.de>

=head1 SEE ALSO

nrun(1), dsh(1)
