#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Long qw(:config autohelp);
use POE::Component::IRC::Common qw(l_irc);
use Pod::Usage;

my $VERSION = '0.01';

GetOptions(
    'b|bot=s'          => \my $bot,
    'r|ignore-regex=s' => \my @ignore_regexes,
    'n|ignore-nick=s'  => \my @ignore_nicks,
    'u|no-urls'        => \my $no_urls,
) or pod2usage();

LINE: while (my $line = <STDIN>) {
    chomp $line;
    my ($nick, $msg);

    if (($nick, $msg) = $line =~ /^\S+ \S+ <(\S+)> (.*)/) {
        # PRIVMSG 
        if (defined $bot) {
            my ($first) = $msg =~ /^\s*([^0\s][^\s:,;.!?]*)[:,;.!?]?\s*/;
            if (defined $first && l_irc($first) eq l_irc($bot)) {
                $msg =~ s/\Q$first\E[:,;.!?]?\s*//;
            }
        }
        
        # skip the message if it only contains an url
        next LINE if $no_urls && $msg =~ m{^\s*\w+://\S+\s*$};
    }
    elsif (($msg) = $line =~ /^\S+ \S+ \* (.*)/) {
        # ACTION
        ($nick) = $msg =~ /^(\S+)/;
    }
    else {
        next LINE;
    }
    
    # skip it if we want to ignore this nick
    for my $bad (@ignore_nicks) {
        next LINE if l_irc($nick) eq l_irc($bad);
    }

    # skip if it matches a regex
    for my $regex (@ignore_regexes) {
        next LINE if $msg =~ /$regex/;
    }

    print "$msg\n";
}

=head1 NAME

irchal_seed.pl - Creates a L<MegaHAL|AI::MegaHAL> training file from logs
generated by L<POE::Component::IRC::Plugin::Logger|POE::Component::IRC::Plugin::Logger>

=head1 SYNOPSIS

B<irchal_seed.pl> [options]

 Options:
   -b, --bot           The name of the IRC bot
   -r, --ignore-regex  A regex for lines to ignore
   -n, --ignore-nick   A nickname to ignore
   -u, --no-urls       Ignore messages consisting only of an URL

Input is accepted on STDIN, and output is delivered to STDOUT.

Example:
 cat ~/logs/\#chan.log | irchal_seed -b MyBot -r 'some|thing?' > megahal.trn

=head1 AUTHOR

Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com

=head1 LICENSE AND COPYRIGHT

Copyright 2008 Hinrik E<Ouml>rn SigurE<eth>sson

This program is free software, you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut
