#!/usr/bin/perl -w

# For bare-bones documentation, do "perldoc NewsClipper.pl". A user's manual
#   is included with the purchase of one of the commercial versions.
# To subscribe to the News Clipper mailing list, visit
#   http://www.NewsClipper.com/techsup.htm#MailingList
# Send bug reports or enhancements to bugs@newsclipper.com. Send in a
#   significant enhancement and you'll get a free license for News Clipper.

# Visit the News Clipper homepage at http://www.newsclipper.com/ for more
# information.

#------------------------------------------------------------------------------

# Written by: David Coppit http://coppit.org/ <david@coppit.org>

# This code is distributed under the GNU General Public License (GPL). See
# http://www.opensource.org/gpl-license.html and http://www.opensource.org/.

# ------------------------------------------------------------------------------

require 5.005;
use strict;

use Getopt::Long;
use FileHandle;
use File::Cache;

use vars qw( %config %opts $VERSION $COMPATIBLE_CONFIG_VERSION $lock_manager );

# These need to be predeclared so that this code can be parsed and run until
# we load the NewsClipper::Globals module. WARNING! Be careful to not use
# these until NewsClipper::Globals has been imported!
sub DEBUG();
sub dprint(@);
sub reformat(@);
sub dequote($;$);

# To suppress warnings
use vars qw(&dprint &reformat &dequote);

# The version of the script
$VERSION = do {my @r=(q$ 1.2.8 $=~/\d+/g);sprintf"%d."."%1d"x$#r,@r};

# The version of configuration file that this version of News Clipper can use.
$COMPATIBLE_CONFIG_VERSION = 1.21;

# ------------------------------ MAIN PROGRAM ---------------------------------

sub SetupConfig();
sub print_usage();
sub HandleProxyPassword();
sub HandleClearCache();
sub ProcessFlagCommand();
sub ProcessFiles();
sub PrintDebugSummary(\@);

{
  # Store a copy of @INC for later debugging messages
  my @startingINC = @INC;

  SetupConfig();

  PrintDebugSummary(@startingINC);
}

# Print the usage if the -h flag was used
print_usage() && exit(0) if $opts{h};

# Set up the lockfile. SetupConfig() above created .NewsClipper, so we can
# lock on that. This feature can not be used in Windows because it doesn't
# have fork()
unless (($^O eq 'MSWin32') || ($^O eq 'dos'))
{
  require LockFile::Simple;
  $lock_manager = LockFile::Simple->make('-autoclean' => 1, '-nfs' => 1,
    '-stale' => 1, '-warn' => 0, '-wfunc' => undef, '-efunc' => undef,
    '-format' => "$NewsClipper::Globals::home/.NewsClipper/lock");
  $lock_manager->lock("$NewsClipper::Globals::home/.NewsClipper")
    or die reformat dequote<<"    EOF";
      There is already another copy of News Clipper running. This copy of News
      Clipper waited 60 seconds for the other copy to finish. Aborting. (You
      should delete $NewsClipper::Globals::home/.NewsClipper/lock if you are
      sure that no other News Clipper is running.)
    EOF
}

HandleProxyPassword();
HandleClearCache();

my $exit_value = 0;

# Do timers unless we are in debug mode or on the broken Windows platform
if (DEBUG || ($^O eq 'MSWin32') || ($^O eq 'dos'))
{
  if ($opts{e})
  {
    ProcessFlagCommand();
  }
  else
  {
    ProcessFiles();
  }
}
else
{
  $SIG{ALRM} = sub { die "newsclipper timeout" };

  eval
  {
    alarm($config{scriptTimeout});
    if ($opts{e})
    {
      ProcessFlagCommand();
    }
    else
    {
      ProcessFiles();
    }
    alarm(0);
  };

  if ($@)
  {
    # See if it was our timeout
    if ($@ =~ /newsclipper timeout/)
    {
      die "News Clipper script timeout has expired. News Clipper killed.\n";
    }
    else
    {
      # The eval got aborted, so we need to stop the alarm
      alarm (0);
      # and print the error. (I'm not simply die'ing here because I don't like
      # the annoying ...propagated message. I don't know if this is the right
      # way to do this, but it works.)
      print STDERR $@;
      $exit_value = 1;
    }
  }
}

$lock_manager->unlock("$NewsClipper::Globals::home/.NewsClipper")
  if defined $lock_manager;

exit $exit_value;

#------------------------------------------------------------------------------

# This is the real meat of the main program. For each file, we parse it,
# executing and News Clipper commands. We also do some work to redirect STDOUT
# to the output file.

sub ProcessFiles()
{
  # Make unbuffered for easier debugging.
  $| = 1 if DEBUG;

  for (my $i=0;$i <= $#{$config{inputFiles}};$i++)
  {
    dprint "Now processing $config{inputFiles}[$i] => $config{outputFiles}[$i]";

    # Print a warning and skip if the file doesn't exist and isn't a text file.
    # However, don't do the checks if the file is STDIN.
    unless ($config{inputFiles}[$i] eq 'STDIN')
    {
      warn reformat "Input file $config{inputFiles}[$i] can't be found.\n"
        and next unless -e $config{inputFiles}[$i];
      warn reformat "Input file $config{inputFiles}[$i] is a directory.\n"
        and next if -d $config{inputFiles}[$i];
      warn reformat "Input file $config{inputFiles}[$i] is empty.\n"
        and next if -z $config{inputFiles}[$i];
    }

    # We'll write to the file unless we're in DEBUG mode.
    my $writeToFile = 1;
    $writeToFile = 0
      if DEBUG || $config{outputFiles}[$i] eq 'STDOUT';

    $config{inputFiles}[$i] = *STDIN if $config{inputFiles}[$i] eq 'STDIN';

    my $oldSTDOUT = new FileHandle;

    # Redirect STDOUT to a temp file.
    if ($writeToFile)
    {
      # Store the old STDOUT so we can replace it later.
      $oldSTDOUT->open(">&STDOUT") or die "Couldn't save STDOUT: $!\n";

      # If the user wants to see a copy of the output... (Doesn't work in
      # Windows or DOS)
      if ($opts{v} && ($^O ne 'MSWin32') && ($^O ne 'dos'))
      {
        # Make unbuffered
        $| = 2;
        open (STDOUT,"| tee $config{outputFiles}[$i].temp") 
          or die reformat dequote<<"          EOF";
            Couldn't create temporary output file
            $config{outputFiles}[$i].temp using "tee": $!
          EOF
      }
      else
      {
        open (STDOUT,">$config{outputFiles}[$i].temp")
          or die reformat dequote<<"          EOF";
            Couldn't create temporary output file
            $config{outputFiles}[$i].temp: $!
          EOF
      }
    }

    require NewsClipper::Parser;

    # Okay, now do the magic. Parse the input file, calling the handlers
    # whenever a special tag is seen.

    my $p = new NewsClipper::Parser;
    $p->parse_file($config{inputFiles}[$i]);

    # Restore STDOUT to the way it was
    if ($writeToFile)
    {
      close (STDOUT);
      open(STDOUT, ">&" . $oldSTDOUT->fileno())
        or die "Can't restore STDOUT: $!.\n";

      # Replace the output file with the temp file. Move it to .del for OSes
      # that have delayed deletes.
      unlink ("$config{outputFiles}[$i].del");
      rename ($config{outputFiles}[$i], "$config{outputFiles}[$i].del");
      rename ("$config{outputFiles}[$i].temp",$config{outputFiles}[$i])
        or die "Could not rename $config{outputFiles}[$i].temp " .
          "to $config{outputFiles}[$i]: $!";
      unlink ("$config{outputFiles}[$i].del");
      chmod 0755, $config{outputFiles}[$i];

      FTP_File($config{outputFiles}[$i],$config{ftpFiles}[$i])
        if defined $config{ftpFiles}[$i] &&
           exists $config{ftpFiles}[$i]{server};
    }
  }
}

#------------------------------------------------------------------------------

# This is a special handler which does parse any files. Instead it creates a
# simple News Clipper command for the handler specified with -e and executes
# that.

sub ProcessFlagCommand()
{
  # Make unbuffered for easier debugging.
  $| = 1 if DEBUG;

  dprint "Now processing handler \"$opts{e}\" => STDOUT";

  my $oldSTDOUT = new FileHandle;

  require NewsClipper::Parser;

  my $inputCommand;
  # Construct the input command
  if ($opts{e} =~ /<.*>/s)
  {
    $inputCommand = dequote<<"    EOF";
    <!-- newsclipper
      $opts{e}
    -->
    EOF
  }
  else
  {
    $inputCommand .= "<!-- newsclipper\n";

    # Each News Clipper command is separated by a comma
    my @handlers = split /,/,$opts{e};
    for (my $i=0 ; $i <= $#handlers ; $i++)
    {
      if ($i == 0)
      {
        $inputCommand .= "  <input name=$handlers[$i]>\n";
      }
      elsif ($i != $#handlers)
      {
        $inputCommand .= "  <filter name=$handlers[$i]>\n";
      }
      else
      {
        $inputCommand .= "  <output name=$handlers[$i]>\n";
      }
    }

    $inputCommand .= "-->\n";
  }

  my $p = new NewsClipper::Parser;
  $p->parse($inputCommand);
}

# ------------------------------------------------------------------------------

# Send the file to the server. Prints and error to STDERR and returns 0 if
# something goes wrong. Returns 1 otherwise.

sub FTP_File()
{
  my $filename = shift;
  my %ftp_info = %{shift @_};

  dprint "FTP'ing file $filename to server $ftp_info{server}";

  use Net::FTP;

  my $numTriesLeft = $config{socketTries};

  my $ftp;    

  do
  {
    $ftp = Net::FTP->new($ftp_info{server},Timeout => $config{socketTimeout});
  } until ($numTriesLeft == 0 || $ftp);

  unless ($ftp)
  {
    warn "FTP connection failed: $@";
    return 0;
  }

  unless ($ftp->login($ftp_info{username},$ftp_info{password}))
  {
    warn "FTP login failed for user $ftp_info{username} on host " .
      "$ftp_info{server}: $@";
    $ftp->quit; 
    return 0;
  }

  unless ($ftp->cwd($ftp_info{dir}))
  {
    warn "Couldn't change to directory $ftp_info{dir} during FTP: $@";
    $ftp->quit; 
    return 0;
  }

  unless ($ftp->put($filename))
  {
    warn "Couldn't FTP file $filename: $@";
    $ftp->quit; 
    return 0;
  }

  $ftp->quit; 
}

# ------------------------------------------------------------------------------

sub get_exe_name
{
  my $exe_name = $0;
  # Fix the $exe_name if it's the compiled version.
  ($exe_name) = $ENV{sourceExe} =~ /([^\/\\]*)$/ if defined $ENV{sourceExe};

  return $exe_name;
}

# ------------------------------------------------------------------------------

# Prints the usage information

sub print_usage()
{
  my $exeName = get_exe_name();

  my $version = "$VERSION, $config{product}";

  if ($config{product} eq "Personal")
  {
    $version .= " ($config{numberpages} page";
    $version .= "s" if $config{numberpages} > 1;
    $version .= ", $config{numberhandlers} handlers)";
  }

  print dequote<<"  EOF";
    This is News Clipper version $version

    usage: $exeName [-adnrvC] [-i inputfile] [-o outputfile]
           [-c configfile] [-e command] [-H path]

    -i The template file to use as input (overrides value in configuration file)
    -o The output file (overrides value in configuration file)
    -e Run the specified handler and output the results. (Overrides -i and -o.)
    -c The configuration file to use
    -a Automatically download handlers as needed
    -n Check for new versions of the handlers
    -r Forces caching proxies to reload data
    -d Enable debug mode
    -P Pause after completion
    -v Output to STDOUT in addition to the file. (Unix only.)
    -C Clear the cache, handler state, or News Clipper state
    -H Set the user's home directory
  EOF
}

# ------------------------------------------------------------------------------

# Calls LoadSysConfig and LoadUserConfig to load the system-wide and user
# configuration information.  It then tweaks a few of the configuration
# parameters and loads the News Clipper global functions and constants. Then
# it prints a summary if we're running in DEBUG mode.  Finally, it validates
# the parameters to make sure they are valid.

sub SetupConfig()
{
  SetupSSI();

  {
    # Get the command line flags. Localize @ARGV since getopt destroys it. We
    # do this before loading the configuration in order to get the -c flag.
    local @ARGV = @ARGV;
    Getopt::Long::Configure(
      qw(bundling noignore_case auto_abbrev prefix_pattern=-));
    GetOptions(\%opts, qw(i:s o:s c:s e:s a h d n r v P C H:s));

    # Treat left-over arguments as -e arguments.
    my $joined_args = join ",",@ARGV;
    @ARGV = ('-e',$joined_args);
    my %extra_opts;
    GetOptions(\%extra_opts, qw(i:s o:s c:s e:s a h d n r v P C H:s));

    if (defined $opts{e})
    {
      $opts{e} .= ",$extra_opts{e}";
    }
    else
    {
      $opts{e} = $extra_opts{e};
    }
  }

  # We load the configuration, being careful not to use any of the stuff in
  # NewsClipper::Globals. (Like dprint, for example.)
  LoadConfigFiles();

  # Translate the cache size into bytes from megabytes, and the maximum image
  # age into seconds from days.
  $config{maxcachesize} = $config{maxcachesize}*1048576
    if defined $config{maxcachesize};
  $config{maximgcacheage} = $config{maximgcacheage}*86400
    if defined $config{maxcacheage};

  # Put the handler locations on the include search path
  foreach my $dir (@{$config{handlerlocations}})
  {
    unshift @INC,@{$config{handlerlocations}} if -d $dir;
  }

  # Override the config values if the user specified -i or -o.
  $config{inputFiles} = [$opts{i}] if defined $opts{i};
  $config{outputFiles} = [$opts{o}] if defined $opts{o};

  # This should be in ValidateSetup, but we need to check it before slurping
  # in the NewsClipper::Globals. (We don't need modulepath in the compiled
  # version.)
  foreach my $directory (split /\s+/,$config{modulepath})
  {
    die "\"$directory\" in modulepath setting of NewsClipper.cfg must be a directory.\n"
      unless -d $directory;
  }

  # Put the News Clipper module file location on @INC
  unshift @INC,split(/\s+/,$config{modulepath})
    if defined $config{modulepath} && $config{modulepath} ne '';

  # Now we slurp in the global functions and constants.
  require NewsClipper::Globals;
  NewsClipper::Globals->import;

  $NewsClipper::Globals::home = GetHomeDirectory();

  # Make the .NewsClipper directory if it doesn't exist already.
  mkdir "$NewsClipper::Globals::home/.NewsClipper", 0700
    unless -e "$NewsClipper::Globals::home/.NewsClipper";

  # Initialize the HTML cache, News Clipper state, and handler factory
  require NewsClipper::Cache;
  $NewsClipper::Globals::cache = new NewsClipper::Cache;
  # To shut up the warning
  { my $dummy = $NewsClipper::Globals::cache; }

  # Be sure to do a require here to load our own version of File::Cache.
  # (Remove later when File::Cache supports persistence mechanism choice.)
  $NewsClipper::Globals::state = new File::Cache (
               { cache_key => "$NewsClipper::Globals::home/.NewsClipper/state",
                 namespace => 'NewsClipper',
                 username => '',
                 filemode => 0666,
                 auto_remove_stale => 0,
                 persistence_mechanism => 'Data::Dumper',
               } );
  # To shut up the warning
  { my $dummy = $NewsClipper::Globals::state; }

  require NewsClipper::HandlerFactory;
  $NewsClipper::Globals::handlerFactory = new NewsClipper::HandlerFactory;
  # To shut up the warning
  { my $dummy = $NewsClipper::Globals::handlerFactory; }

  ValidateSetup();
}

# ------------------------------------------------------------------------------

# This function sets up few things for the case when News Clipper is run as
# a server-side include. (We don't support running News Clipper as a CGI
# program.)

sub SetupSSI()
{
  return unless exists $ENV{SCRIPT_NAME};

  # First, we redirect STDERR to STDOUT so errors go to the browser.
  open(STDERR,">&STDOUT");
}

# ------------------------------------------------------------------------------

# This function loads the system-wide config and the user's config. It dies
# with an error if a configuration file could not be loaded. If the user's
# configuration file can't be found or loaded in Unix, this is okay. But on
# Windows, it is an error.

sub LoadConfigFiles()
{
  my ($sysStatus,$sysConfigMessage) = LoadSysConfig();
  my ($userStatus,$userConfigMessage) = LoadUserConfig();

  # Okay situations
  return if $sysStatus eq 'okay' && $userStatus eq 'okay';
  return if $sysStatus eq 'okay' && ($userStatus eq 'open error' && !$opts{c});
  return if $sysStatus eq 'no env variable' && $userStatus eq 'okay';
  return if $sysStatus eq 'windows' && $userStatus eq 'okay';

  warn $sysConfigMessage if $sysStatus ne 'okay';
  warn "\n" if $sysStatus ne 'okay' && $userStatus ne 'okay';
  warn $userConfigMessage if $userStatus ne 'okay';
  die "\n";
}

# ------------------------------------------------------------------------------

# Loads the system-wide configuration file, storing the location of that file
# in $config{sysconfigfile}. The location is specified by the NEWSCLIPPER
# environment variable.

sub LoadSysConfig()
{
  my $warnings;

  $config{sysconfigfile} = 'Not specified';

  unless (exists $ENV{NEWSCLIPPER})
  {
    $warnings = <<"    EOF";
News Clipper could not open your system-wide configuration file
because your NEWSCLIPPER environment variable is not set.
    EOF
    return ('no env variable',$warnings);
  }

  return ('windows','') if $^O eq 'MSWin32' || $^O eq 'dos';

  my $configFile = "$ENV{NEWSCLIPPER}/NewsClipper.cfg";

  my ($evalWarnings,$evalResult) = ('',0);

  # Hide any warnings that occur from parsing the config file.
  local $SIG{__WARN__} = sub { $evalWarnings .= $_[0] };
  my $home = GetHomeDirectory();

  # We do an eval instead of doing a "do $configFile" because we want to
  # slurp in $home from the enclosing block. "do $configFile" doesn't slurp
  # $home.
  my $openResult = open CONFIGFILE, $configFile;
  if ($openResult)
  {
    my $code = join '', <CONFIGFILE>;
    close CONFIGFILE;
    $evalResult = eval $code;
  }
  else
  {
    $warnings = <<"    EOF";
News Clipper could not open your system-wide configuration file
"$configFile". Make sure your NEWSCLIPPER environment
variable is set correctly. The error is:
$!
    EOF
    return ('open error',$warnings);
  }

  # Check that the config file wasn't a directory or something.
  if (!-f $configFile)
  {
    $warnings = <<"    EOF";
News Clipper could not open your system-wide configuration file
because "$configFile" is not a plain file.
    EOF

    return ('open error',$warnings);
  }

  # Check if there were any syntax errors while eval'ing the configuration
  # file.
  if ($@)
  {
    $warnings = <<"    EOF";
News Clipper found your system-wide configuration file
"$configFile", but it could not be processed
because of the following error:
$@
    EOF
    return ('compile error',$warnings);
  }

  if ($warnings)
  {
    $warnings = <<"    EOF";
News Clipper found your system-wide configuration file
"$configFile", but encountered some warnings
while processing it:
$evalWarnings
    EOF
    return ('compile error',$warnings);
  }

  # No error message means we found it
  if ($evalResult)
  {
    $config{sysconfigfile} = $configFile;
    return ('okay','');
  }
  else
  {
    # Can't get here, since there would have been errors or warnings above.
    die "Whoa! You shouldn't be here! Send email describing what you ".
      "were doing";
  }
}

# ------------------------------------------------------------------------------

# Loads the user's configuration file, storing the location of that file in
# $config{userconfigfile}. The location of this file is
# $home/.NewsClipper/NewsClipper.cfg.

sub LoadUserConfig()
{
  $config{userconfigfile} = 'Not found';

  my $home = GetHomeDirectory();
  my $configFile = $opts{c} || "$home/.NewsClipper/NewsClipper.cfg";

  my ($evalWarnings,$evalResult,$warnings) = ('',0,'');

  # Hide any warnings that occur from parsing the config file.
  local $SIG{__WARN__} = sub { $evalWarnings .= $_[0] };

  # We do an eval instead of doing a "do $configFile" because we want to
  # slurp in $home from the enclosing block. "do $configFile" doesn't slurp
  # $home.
  my $openResult = open CONFIGFILE, $configFile;
  if ($openResult)
  {
    my $code = join '', <CONFIGFILE>;
    close CONFIGFILE;

    # This is kinda tricky. We don't want the %config in $configFile to
    # totally redefine %main::config, so we wrap the "eval" in a package
    # declaration, which will put the config file's %config in the
    # NewsClipper::config package for later use.
    my $outerPackage = __PACKAGE__;
    package NewsClipper::config;
    use vars qw(%config);

    $evalResult = eval $code;

    # Restore outer package, being careful that the eval doesn't overwrite the
    # $@ result of the previous eval
    {
      local $@;
      eval "package $outerPackage";
    }
  }
  else
  {
    if ($^O eq 'MSWin32' || $^O eq 'dos')
    {
      $warnings = <<"      EOF";
News Clipper could not open your personal configuration file
"$configFile". 
Your registry value for "InstallDir" in
"HKEY_LOCAL_MACHINE\\SOFTWARE\\Spinnaker Software\\News
Clipper\\$VERSION" (or your HOME environment variable) may not be
correct.
      EOF
    }
    else
    {
      $warnings = <<"      EOF";
News Clipper could not open your personal configuration file
"$configFile". The error is:
$!
      EOF
    }

    return ('open error',$warnings);
  }

  # Check that the config file wasn't a directory or something.
  if (!-f $configFile)
  {
    if ($^O eq 'MSWin32' || $^O eq 'dos')
    {
      $warnings = <<"      EOF";
News Clipper could not open your personal configuration file
because "$configFile" is not a plain file.  Your registry
value for "InstallDir" in
"HKEY_LOCAL_MACHINE\\SOFTWARE\\Spinnaker Software\\News
Clipper\\$VERSION" (or your HOME environment variable) may not be
correct.
      EOF
    }
    else
    {
      $warnings = <<"      EOF";
News Clipper could not open your personal configuration file
because "$configFile" is not a plain file. Make sure the file
NewsClipper.cfg is in your <HOME>/.NewsClipper directory.
      EOF
    }

    return ('open error',$warnings);
  }

  # Check if there were any syntax errors while eval'ing the configuration
  # file.
  if ($@)
  {
    $warnings = <<"    EOF";
News Clipper found your personal configuration file
"$configFile", but it could not be processed
because of the following error:
$@
    EOF
    return ('compile error',$warnings);
  }

  if ($evalWarnings)
  {
    $warnings = <<"    EOF";
News Clipper found your personal configuration file
"$configFile", but encountered some warnings
while processing it:
$evalWarnings
    EOF
    return ('compile error',$warnings);
  }

  # No error message means we found it
  if ($evalResult)
  {
    $config{userconfigfile} = $configFile;

    # Now override main's %config
    while (my ($key,$value) = each %NewsClipper::config::config)
    {
      $main::config{$key} = $value;
    }

    undef %NewsClipper::config::config;
    return ('okay','');
  }
  else
  {
    # Can't get here, since there would have been errors or warnings above.
    die "Whoa! You shouldn't be here! Send email describing what you ".
      "were doing";
  }
}

# ------------------------------------------------------------------------------

# Simply gets the home directory. First it tries to get it from the password
# file, then from the Windows registry, and finally from the HOME environment
# variable.

sub GetHomeDirectory()
{
  # Get the user's home directory. First try the password info, then the
  # registry (if it's a Windows machine), then any HOME environment variable.
  my $home = $opts{H} || eval { (getpwuid($>))[7] } || 
    GetWinInstallDir() || $ENV{HOME};

  # "s cause problems in Windows. Sometimes people set their home variable as
  # "c:\Program Files\NewsClipper", which causes when the path is therefore
  # "c:\Program Files\NewsClipper"\.NewsClipper\Handler\Acquisition
  $home =~ s/"//g if defined $home;

  die <<"  EOF"
News Clipper could not determine your home directory. On non-Windows
machines, News Clipper attempts to get your home directory using getpwuid,
then the HOME environment variable. On Windows machines, it attempts to
read the registry entry "HKEY_LOCAL_MACHINE\\SOFTWARE\\Spinnaker
Software\\News Clipper\\$VERSION" then tries the HOME environment
variable.
  EOF
    unless defined $home;

    return $home;
}

# ------------------------------------------------------------------------------

# Checks the setup (system-wide modified by user's) to make sure everything is
# okay.

sub ValidateSetup()
{
  die <<"  EOF"
Could not find either a system-wide configuration file or a personal
configuration file.
  EOF
    if $config{sysconfigfile} eq 'Not specified' &&
       $config{userconfigfile} eq 'Not found';

  if (!defined $config{forNewsClipperVersion} ||
      ($config{forNewsClipperVersion} < $COMPATIBLE_CONFIG_VERSION))
  {
    my $version_string = $config{forNewsClipperVersion};
    $version_string = 'pre-1.21' unless defined $version_string;

    die reformat dequote<<"    EOF";
      Your NewsClipper.cfg configuration file is incompatible with this
      version of News Clipper (need $COMPATIBLE_CONFIG_VERSION, have
      $version_string). Please run "ConvertConfig /path/NewsClipper.cfg"
      using the ConvertConfig that came with this distribution.
    EOF
  }

  die "\"handlerlocations\" in NewsClipper.cfg must be non-empty.\n"
    if $#{$config{handlerlocations}} == -1;

  foreach my $dir (@{$config{handlerlocations}})
  {
    die "\"$dir\" from handlerlocations in NewsClipper.cfg is not ".
      "a directory.\n" unless -d $dir;
  }

  CheckRegistration();

  # Check that the user isn't trying to use the -i and -o flags for the Trial
  # and Personal versions
  if (($config{product} eq "Trial" ||
       $config{product} eq "Personal") &&
      (defined $opts{i} || defined $opts{o}))
  {
    die reformat dequote<<"    EOF";
      The -i and -o flags are disabled in the Trial and Personal versions of
      News Clipper. Please specify your input and output files in the
      NewsClipper.cfg file.
    EOF
  }

  # Check that the input files and output files match
  if ($#{$config{inputFiles}} != $#{$config{outputFiles}})
  {
    die reformat dequote <<"    EOF";
      Your input and output files are not correctly specified. Check your
      configuration file NewsClipper.cfg.
    EOF
  }

  # Check that if the user is using ftpFiles, the number matches
  if ($#{$config{ftpFiles}} != -1 &&
      $#{$config{ftpFiles}} != $#{$config{outputFiles}})
  {
    die reformat dequote <<"    EOF";
      Your ftp information is not correctly specified. If you do not want to
      ftp any files, there should be nothing specified. If you want to ftp any
      files, you must specify the information for each file, or use "{}" to
      indicate that a file should not be sent.
    EOF
  }

  # Check that the user isn't trying to process more than one input file for
  # the Trial version
  if ($#{$config{inputFiles}} > 0 && $config{product} eq "Trial")
  {
    die reformat dequote <<"    EOF";
      Sorry, but the Trial version of News Clipper can only process one input
      file.
    EOF
  }

  # Check that the user isn't trying to process more than the registered
  # number of files for the Personal version
  if ($config{product} eq "Personal" &&
      $#{$config{inputFiles}}+1 > $config{numberpages} )
  {
    die reformat dequote<<"    EOF";
      Sorry, but this Personal version of News Clipper is only registered to
      process $config{numberpages} input files.
    EOF
  }

  die "No input files specified.\n" if $#{$config{inputFiles}} == -1;

  # Check that they specified cachelocation and maxcachesize
  die "cachelocation not specified in NewsClipper.cfg\n"
    unless defined $config{cachelocation} &&
           $config{cachelocation} ne '';
  die "maxcachesize not specified in NewsClipper.cfg\n"
    unless defined $config{maxcachesize} &&
           $config{maxcachesize} != 0;

  # Check sockettries, and set it if necessary
  $config{socketTries} = 1 unless defined $config{socketTries};
  die "socketTries must be 1 or more\n" unless $config{socketTries} > 0;
}

# ------------------------------------------------------------------------------

# Prints some useful information when running in DEBUG mode.

sub PrintDebugSummary(\@)
{
  my @startingINC = @{shift @_};

  return unless DEBUG;

  my $exe_name = get_exe_name();

  dprint "Operating system:\n  $^O";
  dprint "Version:\n  $VERSION, $config{product}";
  dprint "Command line was:\n  $exe_name @ARGV";
  dprint "Options are:";

  foreach my $key (sort keys %opts)
  {
    dprint "  $key: $opts{$key}";
  }

  dprint "\$ENV{NEWSCLIPPER}:\n";
  if (defined $ENV{NEWSCLIPPER})
  {
    dprint "  $ENV{NEWSCLIPPER}";
  }
  else
  {
    dprint "  <NOT SPECIFIED>";
  }

  dprint "Home directory:\n  " . GetHomeDirectory();
  
  require Cwd;
  dprint "Current directory:\n  ",Cwd::cwd(),"\n";

  dprint "System-wide configuration file found as:\n  $config{sysconfigfile}\n";
  dprint "Personal configuration file found as:\n  $config{userconfigfile}\n";

  dprint "\@INC before loading configuration:";
  dprint "  $_" foreach @startingINC;

  dprint "\@INC after loading configuration:";
  foreach my $i (@INC)
  {
    dprint "  $i";
  }

  dprint "Configuration is:";
  while (my ($k,$v) = each %config)
  {
    my $keyVal = "  $k:\n";
    if (ref $v eq 'ARRAY')
    {
      grep { $keyVal .= "    $_\n" } @$v;
    }
    else
    {
      if (defined $v && $v ne '')
      {
        $keyVal .= "    $v\n";
      }
      else
      {
        $keyVal .= "    <NOT SPECIFIED>\n";
      }
    }
    dprint $keyVal;
  }
}

# ------------------------------------------------------------------------------

# Checks the registration key to make sure it's a valid one.

sub CheckRegistration()
{
  # Set the default product type
  $config{product} = "Trial";
  $config{numberpages} = 1;
  $config{numberhandlers} = 1;

  # Override the product type in the Open Source version.
  $config{product} = "Open Source", return;

  # Extract the date, license type, and crypt'd code from the key
  my ($date,$license,$numPages,$numHandlers,$code) =
    $config{regKey} =~ /^(.*?)#(.*?)#(.*?)#(.*)#(.*)$/;

  # In case the regKey isn't valid
  $date = '' unless defined $date;
  $license = '' unless defined $license;
  $numPages = '' unless defined $numPages;
  $numHandlers = '' unless defined $numHandlers;
  $code = '' unless defined $code;

  my $licensestring =
    "$date#$license#$^O#$config{email}#$numPages#$numHandlers";

  # Mash groups of eight together to help hash the string for crypt, which can
  # only use up to eight characters
  my $hashed = "";
  foreach ($licensestring =~ /(.{1,8})/gs) { $hashed ^= $_ }

  # Now check the key
  if (crypt ($hashed,$code) eq $code)
  {
    if ($license eq 'p')
    {
      $config{product} = "Personal";
      $config{numberpages} = $numPages;
      $config{numberhandlers} = $numHandlers;
    }

    if ($license eq 'c')
    {
      $config{product} = "Corporate";
    }
  }
  elsif ($config{regKey} ne 'YOUR_REG_KEY_HERE')
  {
    print STDERR reformat dequote<<"    EOF";
      ERROR: Your registration key appears to be incorrect. Here is the
      information News Clipper was able to determine:
    EOF
    die dequote '  ',<<"    EOF";
      System-wide configuration file: $config{sysconfigfile}
      Personal configuration file: $config{userconfigfile}
      Email: $config{email}
      Key: $config{regKey}
      Operating System: $^O
      Date Issued: $date
      License Type: $license
      Number of pages: $numPages
      Number of Handlers: $numHandlers
    EOF
  }
}

# ------------------------------------------------------------------------------

# Clear the cache, prompting the user if necessary.
#
# DO NOT REMOVE THE PROMPT! We don't want hordes of websites calling us
# because someone is clearing their cache and hitting the servers every 5
# minutes of the day.

sub HandleClearCache()
{
  use File::Path;

  if ($opts{C})
  {
    my $response;

    # Clear HTML cache
    print "Do you want to clear the News Clipper HTML cache? ";
    $response = <STDIN>;

    while ($response !~ /^[yn]/i)
    {
      print "Yes or no? ";
      $response = <STDIN>;
    }

    if ($response =~ /^y/i)
    {
      rmtree (["$config{cachelocation}/html"]);
    }

    # Clear handler state
    print "Do you want to clear the handler-specific data storage? ";
    $response = <STDIN>;

    while ($response !~ /^[yn]/i)
    {
      print "Yes or no? ";
      $response = <STDIN>;
    }

    if ($response =~ /^y/i)
    {
      rmtree (["$NewsClipper::Globals::home/.NewsClipper/state/Acquisition"]);
      rmtree (["$NewsClipper::Globals::home/.NewsClipper/state/Filter"]);
      rmtree (["$NewsClipper::Globals::home/.NewsClipper/state/Output"]);
    }

    # Clear News Clipper state
    print reformat "Do you want to clear News Clipper's data storage " .
      "(which includes the times that handlers were last checked for updates)? ";
    $response = <STDIN>;

    while ($response !~ /^[yn]/i)
    {
      print "Yes or no? ";
      $response = <STDIN>;
    }

    if ($response =~ /^y/i)
    {
      rmtree (["$NewsClipper::Globals::home/.NewsClipper/state/NewsClipper"]);
    }

    exit;
  }
}

# ------------------------------------------------------------------------------

# This routine allows the user to enter a username and password for a proxy.

sub HandleProxyPassword()
{
  # Handle the proxy password, if a username was given but not a password, and
  # a tty is available.
  if (($config{proxy_username} ne '') &&
      (($config{proxy_password} eq '') && (-t STDIN)))
  {
    unless (eval "require Term::ReadKey")
    {
      die reformat dequote<<"      EOF";
        You need Term::ReadKey for password authorization.\nGet it from
        CPAN.\n";
      EOF
    }

    # Make unbuffered
    my $oldBuffer = $|;
    $|=1;

    print "Please enter your proxy password: ";

    # Turn off echo to read in password
    Term::ReadKey::ReadMode('noecho');

    $config{proxy_password} = <STDIN>;
    chomp($config{proxy_password});

    # Turn echo back on
    Term::ReadKey::ReadMode ('restore');

    # Give the user a visual cue that their password has been entered
    print "\n";

    $| = $oldBuffer;
  }
}

# ------------------------------------------------------------------------------

# Attempts to grab the installation from the registry for Windows machines. It
# returns nothing if anything goes wrong, otherwise the installation path.

sub GetWinInstallDir()
{
  return if ($^O ne 'MSWin32') && ($^O ne 'dos');

  require Win32::Registry;

  # To get rid of "main::HKEY_LOCAL_MACHINE" used only once warning.
  $main::HKEY_LOCAL_MACHINE = $main::HKEY_LOCAL_MACHINE;

  my $key = "SOFTWARE\\Spinnaker Software\\News Clipper\\$VERSION";
  my $TempKey;

  # Return if we can't find the key in the registry.
  $main::HKEY_LOCAL_MACHINE->Open($key, $TempKey) || return;

  my ($class, $nSubKey, $nVals);
  $TempKey->QueryKey($class, $nSubKey, $nVals);

  # Return if there are no values for the key.
  return if $nVals <= 0;

  my ($value,$type);

  # Return if we can't find the value.
  $TempKey->QueryValueEx('InstallDir',$type,$value) || return;

  # Return if the value is there, but is the wrong type.
  return unless $type == 1;

  return $value;
}

#-------------------------------------------------------------------------------

# If we're in DEBUG mode, output the modules we used during this run. Be
# careful not to try to do this if something bad happened before we loaded
# NewsClipper::Globals, which set the DEBUG constant.

END
{
  if (defined &DEBUG && DEBUG)
  {
    dprint "Here are all the modules used during this run, and their locations:";
    foreach my $key (sort keys %INC)
    {
      dprint "  $key =>\n    $INC{$key}";
    }
  }

  if ($opts{P})
  {
    $| = 1;
    print "News Clipper has finished processing the input files.\n" .
          "Press enter to continue...";
    <STDIN>;
  }
}

# ------------------------------------------------------------------------------

# Needed by compiler

#perl2exe_include constant
#perl2exe_include NewsClipper/AcquisitionFunctions
#perl2exe_include NewsClipper/Cache
#perl2exe_include NewsClipper/HTMLTools
#perl2exe_include NewsClipper/Handler
#perl2exe_include NewsClipper/HandlerFactory
#perl2exe_include NewsClipper/Interpreter
#perl2exe_include NewsClipper/Parser
#perl2exe_include NewsClipper/Types
#perl2exe_include Time/CTime
#perl2exe_include Date/Format
#perl2exe_include Net/NNTP
#perl2exe_include File/Spec/Win32.pm

#-------------------------------------------------------------------------------

__END__

=head1 NAME

News Clipper - downloads and integrates dynamic information into web pages

=head1 SYNOPSIS

 Using the input and output files specified in either the system-wide
 NewsClipper.cfg file, or the personal NewsClipper.cfg file in
 ~/.NewsClipper

 $ NewsClipper.pl [-anrv] [-c configfile]

 Override the input and output files

 $ NewsClipper.pl [-anrv] [-c configfile] \
   -i inputfile -o outputfile

 Provide a sequence of News Clipper commands on the command line

 $ NewsClipper.pl [-anrv] [-c configfile] \
   -e "handlername, handlername, handlername"


=head1 DESCRIPTION

I<News Clipper> grabs dynamic information from the internet and integrates it
into your webpage. Features include modular extensibility, timeouts to handle
dead servers without hanging the script, user-defined update times, and
automatic installation of modules. 

News Clipper takes an input HTML file, which includes special tags of the
form:

  <!--newsclipper
    <input name=X>
    <filter name=Y>
    <output name=Z>
  -->

where I<X> represents a data source, such as "yahootopstories", "slashdot",
etc. When such a tag is encountered, News Clipper attempts to load and execute
the handler to acquire the data. Then the data is sent to the filter named by
I<Y>, and then on to the output handler named by I<Z>.  If the handler can not
be found, the script asks for permission to attempt to download it from the
central repository.


=head1 HANDLERS

News Clipper has a modular architecture, in which I<handlers> implement the
acquisition and output of data gathered from the internet. To use new data
sources, first locate an interesting one at
http://www.newsclipper.com/handlers.html, then place
the News Clipper tag in your input file. Then run News Clipper once manually,
and it will prompt you for permission to download and install the handler.

You can control, at a high level, the format of the output data by using the
built-in filters and handlers described on the handlers web page. For more
control over the style of output data, you can write your own handlers in
Perl. 

To help handler developers, a utility called I<MakeHandler.pl> is included with
the News Clipper distribution. It is a generator that asks several questions,
and then creates a basic handler.  Handler development is supported by two
APIs, I<AcquisitionFunctions> and I<HTMLTools>. For a complete description of
these APIs, as well as suggestions on how to write handlers, visit
http://www.newsclipper.com/handlers.html.

News Clipper has the ability to automatically download handlers whose
functionality did not change relative to the currently installed version.
This means that you can safely download the update and be guaranteed that it
will not break your existing News Clipper commands.  These "bugfix updates"
are controlled by the auto_dl_bugfix_updates value in the NewsClipper.cfg
file.

You can also tell News Clipper to download "functional updates", which are
handlers whose interface has changes relative to the version you have. These
updates are the most recent versions of the handler, but they contain changes
that may break existing News Clipper commands.


=head1 OPTIONS AND ARGUMENTS

=over 4

=item B<-i> inputfile

Override the input file specified in the configuration file. The special
filename "STDIN" gets input from standard input (useful for piping commands to
News Clipper).

=item B<-o> outputfile

Override the output file specified in the configuration file. The special
filename "STDOUT" sends output to standard output instead of a file.

=item B<-e> commands

Run the specified handler using the default filters and output handlers, and
output the result to STDOUT. This option overrides B<-i> and B<-o>. Commands
can be in the form of a normal News Clipper bracket syntax, or as a
comma-separated list. For example, the following are equivalent:

 $ echo '<!-- newsclipper <input name=date style=day><output name=string> -->' | \
   NewsClipper.pl -i STDIN -o STDOUT

 $ NewsClipper.pl -e 'date style=day,string'

 $ NewsClipper.pl -e '<input name=date style=day><output name=string>'

Note that commas can not be escaped -- commas that appear in quotes, for
example, B<will> be interpreted as delimiters between commands.

=item B<-c>

Use the specified file as the configuration file, instead of NewsClipper.cfg.

=item B<-n>

Check for new bugfix and functional updates to any handlers encountered.

=item B<-a>

Automatically download any bugfix or functional updates to handlers News
Clipper processes. Use the auto_dl_bugfix_updates in the configuration file
to always download bugfix versions, but not functional updates. This flag
should only be used when News Clipper is run interactively, since functional
updates can break web pages that rely on the older functionality.

=item B<-P>

Pause after News Clipper has completed execution. (This is useful when running
News Clipper in a window that automatically closes upon program exit.)

=item B<-r>

Reload the content from the proxy server even on a cache hit. This prevents
News Clipper from using stale data when constructing the output file.

=item B<-d>

Enable debug mode, which prints extra information about the execution of News
Clipper. Output is sent to the screen instead of the output file.

=item B<-v>

Verbose output. Output a copy of the information sent to the output file to
standard output. Does not work on Windows or DOS.

=item B<-H>

Use the specified path as the user's home directory, instead of auto-detecting
the path. This is useful for specifying the location of the .NewsClipper
directory.

=item B<-C>

Clear the News Clipper cache, handler-specific state, or News Clipper state.
The cache contains information acquired by acquisition handlers.
Handler-specific state is any information that handlers store between runs.
News Clipper state is any information that News Clipper stores between runs,
such as the last time a handler was checked for an update.

Clearing the cache significantly slows down News Clipper and increases network
traffic on remote servers---use with care. Similarly, clearing News Clipper
state forces News Clipper to check for updates to handlers.

=back

=head1 Configuration

The file NewsClipper.cfg contains the configuration. News Clipper will first
look for this file in the system-wide location specified by the NEWSCLIPPER
environment variable. News Clipper will then load the user's NewsClipper.cfg
from $home/.NewsClipper. Any options that appear in the personal configuration
file override those in the system-wide configuration file, except for the
modulepath option. In this file you can specify the following:

=over 2

=item $ENV{TZ}

The timezone for Windows. (This is automatically detected on Unix-like
platforms.)

=item email

The user's email address. This is used for registration for the commercial
version.

=item regKey

The registration key. This is used for registration for the commercial
version.

=item inputFiles, outputFiles

Multiple input and output files. The first input file is transformed into the
first output file, the second input file to the second output file, etc.

=item handlerlocations

The locations of handlers. For example, ['dir1','dir2'] would look for
handlers in dir1/NewsClipper/Handler/ and dir2/NewsClipper/Handler/. Note that
while installing handlers, the first directory is used. This can be used to
provide a location for a single repository of handlers, which can be shared
by all users.

=item modulepath

The location of News Clipper's modules, in case the aren't in the standard
Perl module path. (Set during installation.) For pre-compiled versions of News
Clipper, this setting also includes extra directories, separated by
whitespace, which are paths in which to search for any additional Perl
modules.

=item cachelocation

The location of the cache in the file system.

=item maxcachesize

The maximum size of the cache in megabytes. It should be at least 5.

=item scriptTimeout

The timeout value for the script. This puts a limit on the total time the
script can execute, which prevents it from hanging. This does not work on
Windows or DOS.

=item socketTimeout

The timeout value for socket connections. This allows the script to recover
from unresponsive servers.

=item socketTries

The number of times to try a connection before giving up.

=item proxy

Your proxy host. For example, "http://proxy.host.com:8080/"

=back

NewsClipper.cfg also contains handler-specific configuration options. These
options are generally documented in the handler's syntax documentation.

The NewsClipper.cfg that comes with the distribution contains default
configuration information for the cacheimages handler:

=over 2

=item imgcachedir

The location in the filesystem of the image cache. This location should be
visible from the web.

=item imgcacheurl

The URL that corresponds to the image cache directory specified by
imgcachedir. 

=item maximgecacheage

The maximum age of images in the image cache. Old images will be removed from
the cache.

=back

=head1 RUNNING

You can run NewsClipper.pl from the command line. The B<-e>, B<-i>, and B<-o>
flags allow you to test your input files. When you are happy with the way
things are working, you should run News Clipper as a cron job. To do this,
create a .crontab file with something similar to the following:

=over 4

0 7,10,13,16,19,22 * * * /path/NewsClipper.pl

=back

"man cron" for more information.

=head1 PREREQUISITES

This script requires the C<Time::CTime>, C<Time::ParseDate>, C<LWP::UserAgent>
(part of libwww), C<URI>, C<HTML-Tree>, and C<HTML::Parser> modules, in
addition to others that are included in the standard Perl distribution.
See the News Clipper distribution's README file for more information.

Handlers that you download may require additional modules.

=head1 NOTES

News Clipper has 2 web sites: the open source homepage at
http://newsclipper.sourceforge.net, and the commercial homepage at
http://www.newsclipper.com/ The open source homepage has instructions for
getting the source via CVS, and has documentation aimed at developers. The
commercial web site contains a FAQ, information for buying the commercial
version, and more.

=head1 AUTHOR

David Coppit, <david@coppit.org>, http://coppit.org/
Spinnaker Software, Inc.

=begin CPAN

=pod COREQUISITES

none

=pod OSNAMES

any

=pod SCRIPT
