#!/usr/bin/perl 
#$Revision: 1.3 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $
#######################################################################
# FIXME: this script screen scapes the web to build classes to validate
# ResponseGroups.  Unfortunately, this breaks too frequently.  A
# better way needs to be found.
#######################################################################

require 5.008_001;

use Getopt::Long;
use IO::File;
use Pod::Usage;
use LWP::Simple;
use Text::Template;
use XML::Simple;
use Data::Dumper;

use strict;
use warnings;


sub AWS4_BASE_URL {
    #'http://docs.amazonwebservices.com/AWSECommerceService/2007-10-29/DG/';
    'http://docs.amazonwebservices.com/AWSECommerceService/2009-03-31/DG/';
}
sub AWS4_ONLINE_HTML {
    AWS4_BASE_URL . 'CHAP_ResponseGroupsList.html';
}

my $Opt_Debug = 0;
my $Opt_Dest = "../lib/Net/Amazon/Validate/Type";
my $Opt_Overwrite = 0;

unless (&GetOptions (
		     "help|h"	  => \&usage,
		     "version|V"  => \&version,
		     "debug|D"    => \$Opt_Debug,
             "dest=s"     => \$Opt_Dest,
		     "overwrite"  => \$Opt_Overwrite,
		     "<>"		  => \&parameter,
		     )) {
    usage();
}

## main #########################################

unless (-d $Opt_Dest) {
    die "The directory $Opt_Dest does not exist!\n";
}

my $xs   = XML::Simple->new(ForceArray => [qw(li)], NormaliseSpace => 2);
my $href = $xs->XMLin(get(AWS4_ONLINE_HTML));


my %response_groups;

for my $divI (@{$href->{body}->{div}}) {
    next unless defined $divI->{xmlns};

    for my $divJ (@{$divI->{div}}) {
        for my $td (@{$divJ->{table}->{tbody}->{tr}->{td}}) {
            for my $li (@{$td->{div}->{ul}->{li}}) {
                my $ref = $li->{p}->{a};
                $response_groups{$ref->{content}} = $ref->{href}
            }
        }
    }
}

print Dumper(\%response_groups) if $Opt_Debug;
sort my %operation_to_rg_map;

for my $rg (keys %response_groups) {
    my $link = AWS4_BASE_URL . $response_groups{$rg};
    print "fetching $link ...\n" if $Opt_Debug;
    my $html = get($link);
    my $href = $xs->XMLin($html);

    for my $divI (@{$href->{body}->{div}}) {
        next unless defined $divI->{xmlns};
        
        for my $divJ (@{$divI->{div}}) {
            next unless defined $divJ->{p};
            next unless $divJ->{p} =~ m/^Operations that can use this/;

            for my $divK (@{$divJ->{div}}) {
                for my $li (@{$divK->{ul}->{li}}) {
                    # GRRR: Amazon, be consistent!!
                    my $grp = (ref($li->{p}) eq 'HASH') ? $li->{p}->{a}->{content} : $li->{p};

                    # XXX: some groups are not a single word, but 
                    # rather a sentence. My heuristics are not good 
                    # enough, so skip anything with a space in the name.
                    next if $grp =~ m/\s/; 
                    push @{$operation_to_rg_map{$grp}}, $rg;
                }
            }
        }
    }
}

print Dumper(\%operation_to_rg_map) if $Opt_Debug;

for my $op (keys %operation_to_rg_map) {
    my $fn = "$Opt_Dest/$op.pm";

    print "templating $fn ...\n" if $Opt_Debug;

    unless (-d "$Opt_Dest") {
        mkdir "$Opt_Dest" or
            die "Failed to create '$Opt_Dest'!\n";
    }

    if (-f $fn && !$Opt_Overwrite) {
        warn "The file $fn already exists, skipping!\n";
        next;
    }

    my $template = Text::Template->new(TYPE       => 'FILE',
            SOURCE     => 'aws4-types.tmpl',
            DELIMITERS => [ '[%--', '--%]', ],
            );

    my $hash = {'MODULE_NAME' => $op,
        'groups'      => $operation_to_rg_map{$op},
    };

    my $text = $template->fill_in(HASH => $hash);
    unless ($text) {
        die "Failed to fill in the text template for $op!\n";
    }

    my $fouth = IO::File->new(">$fn") or
        die "$! '$fn'!\n";

    print $fouth $text;

    $fouth->close();
}


## subs #########################################

sub usage {
    print '$Revision: 1.3 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n";
    pod2usage(-verbose=>2, -exitval => 2);
    exit (1);
}

sub version{
    print '$Revision: 1.3 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n";
    exit (1);
}

sub parameter {
    my $param = shift;
    die "%Error: Unknown parameter: $param\n";
}

##################################################
__END__

=pod

=head1 asw4-types

B<asw4-types> - convert Amazon's HTML data to Perl libraries.

=head1 SYNOPSIS

B<asw4-types> - [I<OPTION>]... [I<FILE>]...

=head1 DESCRIPTION

B<asw4-types> converts the data stored in Amazon's HTML pages for ASW4 into
Perl libraries.  These libraries are used by Net::Amazon to validate user
input.

=head1 ARGUMENTS

=over 4

=item -h, --help

Displays this message and program version and exits.

=item -V, --version

Displays the program's version and exits.

=item -D, --debug

Prints debug information.

=item --overwrite

Overwrite any libraries if they already exist.

=item --dest E<lt>directoryE<gt>

Specify the destination where the files should be written.

=back

=head1 AUTHORS

Written by Christopher Boumenot.

=head1 REPORTING BUGS

Report bugs to <boumenot@gmail.com>.

=head1 SEE ALSO

=cut

