package OpenInteract::ResultsManage;

# $Id: ResultsManage.pm,v 1.5 2001/07/16 14:23:08 lachoy Exp $

use strict;
use Data::Dumper  qw( Dumper );
use SPOPS::Utility;

BEGIN {
    eval { require OpenInteract::ResultsIterator };
}

@OpenInteract::ResultsManage::ISA     = ();
$OpenInteract::ResultsManage::VERSION = sprintf("%d.%02d", q$Revision: 1.5 $ =~ /(\d+)\.(\d+)/);

use constant FILENAME_WIDTH => 20;

my $LOCK_EXT         = 'lock';
my $RECORD_SEP       = '-->';
my $EXTRA_NAME_SEP   = ',';
my $MIXED_IDENTIFIER = 'MIXED';

my @VALID_INIT_KEYS  = qw( search_id min max );

# Constructor

sub new {
    my ( $pkg, $p ) = @_;
    my $class = ref $pkg || $pkg;
    my $data = $p || {};
    foreach my $key ( @VALID_INIT_KEYS ) {
        $data->{ $key } = $p->{ $key };
    }
    return bless( $data, $class );
}


# Clear out all information in an object

sub clear {
    my ( $self ) = @_;
    while ( my ( $k, $v ) = each %{ $self } ) {
        undef $self->{ $k };
    }
    return $self;
}


# Save given results to a file

sub save {
    my ( $self, $identifier_list, $p ) = @_;
    $p ||= {};
    my $R = OpenInteract::Request->instance;
    $R->DEBUG && $R->scrib( 1, "Trying to save search results." );

    # Generate a search ID and get the filename, then lock it and open it

    my ( $search_id, $filename ) = $self->get_persistent_info();
    $self->results_lock( $filename );
    eval { open( RESULTS, "> $filename" ) || die "Cannot open for writing ($filename): $!\n" };
    if ( $@ ) { 
        $R->scrib( 0, "Search result save failure. $@" );
        close( RESULTS );
        $self->results_clear( $filename );
        die "Search result save failure: $@\n";
    }

    # See if the resultset is homogenous -- if the user wants it mixed
    # then keep it that way, otherwise look at the data passed in to
    # save

    $p->{class} = $MIXED_IDENTIFIER if ( $p->{force_mixed} );
    $p->{class} ||= $self->review_results_class( $identifier_list );

    my $has_extra = 0;
    my $extra_name = undef;
    if ( ref $p->{extra} eq 'ARRAY' ) {
        $has_extra = ( ref $p->{extra}->[0] eq 'ARRAY' )
                       ? scalar @{ $p->{extra}->[0] } : 1;
        $p->{extra_name} ||= [];
        $extra_name = join( $EXTRA_NAME_SEP, @{ $p->{extra_name} } );
    }
    my $num_records = scalar @{ $identifier_list };
    print RESULTS join( $RECORD_SEP, scalar localtime, $num_records, 
                                     $p->{class}, $has_extra, $extra_name ), "\n";
    my $count = 0;

    # Go through the items to save and derive a value and class for
    # each -- it's ok if the class is blank since that means we're
    # just saving raw data

    foreach my $item ( @{ $identifier_list } ) {
        my ( $item_class, $item_value ) = ( ref $item ) 
                                            ? ( ref $item, $item->id )
                                            : ( $p->{class}, $item );
        $item_class = '' if ( $item_class eq $MIXED_IDENTIFIER );
        my @result_info = ( $item_class, $item_value );
        if ( $has_extra ) {
            if ( ref $p->{extra}->[ $count ] eq 'ARRAY' ) {
                push @result_info, @{ $p->{extra}->[ $count ] };
            }
            else {
                push @result_info, $p->{extra}->[ $count ];
            }
        }
        print RESULTS join( $RECORD_SEP, @result_info ), "\n";
        $count++;
    }
    close( RESULTS );
    $self->results_unlock( $filename );
    $R->DEBUG && $R->scrib( 1, scalar @{ $identifier_list }, "search results saved ok." );
    $self->{num_records} = $num_records;
    return $self->{search_id} = $search_id;
}


# Returns either ( \@classes, \@ids, $num_records ) or \@ids depending
# on context

sub retrieve {
    my ( $self, $p ) = @_;
    unless ( $self->{search_id} ) {
        die "Cannot retrieve results without a search_id! Please set at object ",
            "initialization or as a property of the object before running retrieve().\n";
    }

    # 'min' and 'max' can be properties or passed in

    $self->{min} ||= $p->{min};
    $self->{max} ||= $p->{max};

    # Clear out the number of records

    $self->{num_records} = 0;

    my $R = OpenInteract::Request->instance;
    $R->DEBUG && $R->scrib( 1, "Retrieving raw search results for ID ($self->{search_id})" );
    $self->assign_results_to_object( $self->retrieve_raw_results( $p ) );

    # If they asked for an iterator return it, but first clear out any
    # min/max values since they've already been preselected

    if ( $p->{return} eq 'iterator' ) {
        $self->{min} = $self->{max} = $p->{min} = $p->{max} = 0;
        return $self->retrieve_iterator( $p );
    }
    return $self;
}


# Note that this only works on saved SPOPS objects

sub retrieve_iterator {
    my ( $self, $p ) = @_;

    # 'min' and 'max' can be properties or passed in

    $self->{min}         ||= $p->{min};
    $self->{max}         ||= $p->{max};

    my $R = OpenInteract::Request->instance;
    $R->DEBUG && $R->scrib( 1, "Retrieving search iterator for ID ($self->{search_id})" );
    unless ( $self->{result_list} ) {
        $self->assign_results_to_object( $self->retrieve_raw_results( $p ) );
    }
    unless ( $self->{record_class} ) {
        die "Cannot create iterator! Search results were not saved with a classname.",
            "(Search ID: $self->{search_id})\n";
    }
    if ( $self->{record_class} eq $MIXED_IDENTIFIER ) {
        $self->{min} = $self->{max} = 0;
        return OpenInteract::ResultsIterator->new({ results       => $self,
                                                    skip_security => $p->{skip_security} });
    }
    else {
        return $self->{record_class}->fetch_iterator({ id_list => $self->get_id_list() });
    }
}


# Take the result list internally and return a list of all IDs

sub get_id_list {
    my ( $self ) = @_;
    unless ( $self->{result_list} ) {
        die "Results not yet saved for this object!\n"
    }
    return [ map { $_->{id} } @{ $self->{result_list} } ];
}


########################################
# PAGING METHODS
########################################


# Note that $item can be a class or an object

sub find_total_page_count {
    my ( $item, $per_page, $p_num_records ) = @_;
    my $num_records = ( ref $item and $item->{num_records} ) 
                        ? $item->{num_records} : $p_num_records;
    return 0 unless ( $per_page and $num_records );
    my $num_pages = $num_records / $per_page;
    return ( int $num_pages != $num_pages ) ? int( $num_pages ) + 1 : int $num_pages;
}


sub find_page_boundaries {
    my ( $class, $page_num, $per_page ) = @_;
    return ( 0, 0 ) unless ( $page_num and $per_page );
    my $max = $page_num * $per_page;
    my $min = $max - $per_page;
    return ( $min, $max );
}



##################################################
# INTERNAL METHODS
##################################################


# Returns a hashref of information about the results, including the ID
# list, the class, results save time. Pass in min/max in the hashref
# (second arg) to have the results be paged.

sub retrieve_raw_results {
    my ( $self, $p ) = @_;
    my $R = OpenInteract::Request->instance;
    die "No search_id defined in object!\n" unless ( $self->{search_id} );
    my $filename = $self->build_results_filename();
    eval { open( RESULTS, $filename ) || die "Cannot open ($filename): $!\n" };
    if ( $@ ) {
        $R->scrib( 0, "Search result retrieval failure. $@" );
        return undef;
    }
    my %info = ();
    my @metadata_order = qw( date num_records record_class num_extra tmp_extra_name );
    my $first_line = <RESULTS>;
    chomp $first_line;
    @info{ @metadata_order } = split( $RECORD_SEP, $first_line, scalar @metadata_order );
    if ( $info{tmp_extra_name} ) {
        $info{extra_name} = [];
        @{ $info{extra_name} } = split /$EXTRA_NAME_SEP/, $info{tmp_extra_name};
        delete $info{tmp_extra_name};
    }
    $R->DEBUG && $R->scrib( 1, "Search run: $info{date}; items saved: $info{num_records};",
                               "record_class: $info{record_class}" );
    my $count = 1;
    my $min = $self->{min} || $p->{min};
    my $max = $self->{max} || $p->{max};
    while ( <RESULTS> ) {
        if ( $min and $count < $min ) { $count++; next; }
        if ( $max and $count > $max ) { last; }
        chomp;
        my ( $item_class, $item_value, @extra ) = split /$RECORD_SEP/, $_, 2 + $info{num_extra};
        my $result_info = { id => $item_value };
        $result_info->{class} = $item_class if ( $info{record_class} eq $MIXED_IDENTIFIER );

        # Set the extra information in the result -- note that it's
        # ALWAYS an arrayref, even if there's only one.

        if ( $info{num_extra} ) {
            $result_info->{extra} = \@extra;
        }
        push @{ $info{result_list} }, $result_info;
        $count++;
    }
    close( RESULTS );
    return \%info;
}


sub assign_results_to_object {
    my ( $self, $result_info ) = @_;
    $self->{num_records}  = $result_info->{num_records};
    $self->{date}         = $result_info->{date};
    $self->{extra_name}   = $result_info->{extra_name};
    $self->{num_extra}    = $result_info->{num_extra};
    $self->{record_class} = $result_info->{record_class};
    $self->{result_list}  = $result_info->{result_list};
    return $self;
}


sub build_results_filename {
    my ( $item, $p_search_id ) = @_;
    my $search_id = ( ref $item and $item->{search_id} ) ? $item->{search_id} : $p_search_id;
    unless ( $search_id ) {
        die "Cannot build a results filename without a search_id as property or parameter!\n";
    }
    return join( '/', OpenInteract::Request->instance->CONFIG->get_dir( 'overflow' ),
                      $search_id );
}


# Don't save the 'search_id' parameter into the object yet, since
# this method only ensures that we *can* create a file with the
# search_id

sub get_persistent_info {
    my ( $class ) = @_;
    my $R = OpenInteract::Request->instance;
    my $overflow_dir = $R->CONFIG->get_dir( 'overflow' );
    unless ( -d $overflow_dir ) {
        $R->scrib( 0, "Search results directory ($overflow_dir) is not a directory" );
        die "Configuration option for writing search results ($overflow_dir) is not a directory";
    }
    unless ( -w $overflow_dir ) {
        $R->scrib( 0, "Search results directory ($overflow_dir) is not writeable" );
        die "Configuration option for writing search results ($overflow_dir) is not writeable";
    }
    while ( 1 ) {
        my $search_id = SPOPS::Utility->generate_random_code( FILENAME_WIDTH );
        my $filename = $class->build_results_filename( $search_id );
        next if ( -f $filename || -f "$filename.$LOCK_EXT" );
        $R->DEBUG && $R->scrib( 1, "Found non-existent search info: ($search_id) ($filename)" );
        return ( $search_id, $filename );
    }
}


# Lock the results file using another file

sub results_lock {
    my ( $class, $filename ) = @_;
    open( LOCK, "> $filename.$LOCK_EXT" ) 
        || die "Cannot open lockfile ($filename.$LOCK_EXT): $!";
    print LOCK scalar localtime;
    close( LOCK );
}


# Unlock the results file by deleting the lockfile

sub results_unlock {
    my ( $class, $filename ) = @_;
    unlink( "$filename.$LOCK_EXT" ) 
          || die "Cannot remove lockfile ($filename.$LOCK_EXT): $!";
}


# Clear out the results, including the lockfile. This is called if we
# encounter some sort of error in the middle of writing.

sub results_clear {
    my ( $class, $filename ) = @_;
    $class->results_unlock( $filename );
    unlink( $filename ); # don't die here b/c if this is called we have bigger problems...
}


# See whether we have a homogenous resultset or not -- return the
# class of all objects if they're all the same, otherwise return the
# global $MIXED_IDENTIFIER which tells us it's heterogeneous

sub review_results_class {
    my ( $class, $result_list ) = @_;
    my ( $main_class );
    foreach my $result ( @{ $result_list } ) {
        my $result_class = ref $result;
        next unless ( $result_class );
        return $MIXED_IDENTIFIER if ( $main_class and $main_class ne $result_class );
        $main_class ||= $result_class;
    }
    return $main_class;
}


1;

__END__

=pod

=head1 NAME

OpenInteract::ResultsManage - Save and retrieve generic search results

=head1 SYNOPSIS

 use OpenInteract::ResultsManage;

 # Basic usage

 ... perform search ...

 my $results = OpenInteract::ResultsManage->new();
 $results->save( \@id_list );
 $R->{session}->{this_search_id} = $results->{search_id};

 ... another request from this user ...

 my $results = OpenInteract::ResultsManage->new({ 
                              search_id => $R->{session}->{this_search_id} });
 my $result_list = $results->retrieve();

 # Use with paged results

 my $results = OpenInteract::ResultsManage->new(); 
 $results->save( \@id_list );
 $R->{session}->{this_search_id} = $results->{search_id};
 my $page_num = $R->apache->param( 'pagenum' );
 my ( $min, $max ) = $results->find_page_boundaries( $page_num, $HITS_PER_PAGE );
 my ( $results, $total_count ) = $results->retrieve({ min => $min, max => $max } );
 my $total_pages = $results->find_total_page_count( $HITS_PER_PAGE );
 my $total_hits = $results->{num_records};
 
 # Can now print "Page $page_num of $total_pages" or you 
 # can pass this information to the template and use the 
 # 'page_count' component and pass it 'total_pages',
 # 'current_pagenum', and a 'url' to get back to this page:

 [%- comp( 'page_count', total_pages     = 5,
                         current_pagenum = 3,
                         url             = url ) -%]

 Displays:

 Page [<<] [1] [2] 3 [4] [5] [>>]

 (Where the items enclosed by '[]' are links.)

=head1 DESCRIPTION

This class has methods to enable you to easily create paged result
lists. This includes saving your results to disk, retrieving them
easily and some simple calculation functions for page number
determination.

=head1 PUBLIC METHODS

The following methods are public and available for OpenInteract
application developers.

B<save( \@results_list, \%params )>

Saves a list of things to be retrieved later. The things can be either
ID values (simple scalars) or SPOPS objects. If objects are passed in,
we call C<-E<gt>id()> on each to get the ID value to save.

If objects are used, we also query each one for its class and save
that information in the search results. Whether you have a homogenous
resultset or not affects the return values. If it is a homogenous
resultset we note the class for all objects in the search results
header. This enables us to create an iterator from the results if
needed.

Parameters:

=over 4

=item *

B<class> ($) (optional)

You can force all the IDs passed in to be of a particular class.

B<force_mixed> (bool) (optional)

Forces the resultset to be treated as heterogeneous (mixed) even if
all objects are of the same class.

B<extra> (\@) (optional)

Each item represents extra information to save along with each
result. Each item must be either a scalar (which saves one extra item)
or an arrayref (which saves a number of extra items).

B<extra_name> (\@)  (optional)

If you specify extra information you need to give each one a name.

=back

Returns: an ID you can use to retrieve the search results using
the C<retrieve()> or C<retrieve_iterator()> methods. If
you misplace the ID, you cannot get the search results back.

Example:

 my $results = OpenInteract::ResultsManage->new();
 my $search_id = $results->save({ \@results,
                                  { force_mixed => 1,
                                    extra       => \@extra_info,
                                    extra_name  => [ 'hit_count', 'weight' ] });

The following parameters are set in the object after a successful
results save:

'search_id'

'num_records'

Returns: the ID of the search just saved.

B<retrieve( $search_id, \%params )>

Retrieve previously saved search results using the parameter
'search_id' which should be set on initialization or before this
method is run.

Parameters:

=over 4

=item *

B<min>: Where we should start grabbing the results. Generally used if
you are using a paged results scheme, (page 1 is 1 - 25, page 2 26 -
50, etc.). (Can be set at object creation.)

=item *

B<max>: Where should we stop grabbing the results. See B<min>. (Can be
set at object creation.)

=back

Returns:

=over 4

=item *

B<In list context>: an array with the first element an arrayref of the
results (or IDs of the results), the second element an arrayref of the
classes used in the results, the third element being the total number
of items saved. (The total number of items can be helpful when
creating pagecounts.)

=item *

B<In scalar context>: an arrayref of the results.

=back

Note: The interface for this method may change, and we might split
apart the different return results into two methods (particularly
whether classes are involved).

Also sets the object parameters:

'num_records' - total number of results in the original search

'date' - date the search was run

'num_extra' - number of 'extra' records saved

'extra_name' (\@) - list of fields matching extra values saved

B<retrieve_iterator( $search_id, \%params )>

Retrieves an iterator to walk the results. You can use min/max to
pre-separate or you can simply grab all the results and screen them
out yourself.

Parameters: same as C<retrieve()>

B<find_total_page_count( $records_per_page, [ $num_records ] )>

If 'num_records' is not in the object, then you can pass it as a
second parameter. In this manner the method is a class method.

Returns: Number of pages required to display $num_records at
$records_per_page.

Example:

 my $page_count = $class->find_total_page_count( 289, 25 );
 # $page_count = 11

 my $page_count = $class->find_total_page_count( 289, 75 );
 # $page_count = 4

B<find_page_boundaries( $page_number, $records_per_page )>

Returns: An array with the floor and ceiling values to display the
given page with $records_per_page on the page.

Example: 

 my ( $min, $max ) = $class->find_page_boundaries( 3, 75 );
 # $min is 226, $max is 300

 my ( $min, $max ) = $class->find_page_boundaries( 12, 25 );
 # min is 301, $max is 325

=head1 INTERNAL METHODS

B<build_results_filename()>

B<get_persistent_info()>

B<results_lock()>

B<results_unlock()>

B<results_clear()>

B<retrieve_raw_results()>

=head1 DATA FORMAT

Here is an example of a saved resultset. This one happens to be
generated by the L<OpenInteract::FullText> module.

 Thu Jul 12 17:19:05 2001-->3-->-->1-->fulltext_score
 -->3d5676e0af1f1cc6b539fb08a5ee67b7-->2
 -->c3d72c3c568d99a796b23e8efc75c00f-->1
 -->8f10f3a91c3f10c876805ab1d76e1b94-->1

Here are all the pieces:

B<First>, the separator is C<--E<gt>>. This is configurable in this
module.

B<Second>, the first line has:

=over 4

=item *

C<Thu Jul 12 17:19:05 2001>

The date the search was originally run.

=item *

C<3>

The number of items in the entire search resultset.

=item *

C<> (empty)

If it were filled it would be either a classname (e.g.,
'MySite::User') or the keyword 'MIXED' which tells this class that the
results are of multiple classes.

=item *

C<1>

The number of 'extra' fields.

=item *

C<fulltext_score>

The name of the first 'extra' field. If there wore than one extra
field they would be separated with commas.

=back

B<Third>, the second and remaining line have three pieces:

=over 4

=item *

C<> (empty)

The class name for this result. Since these IDs are not from a class,
there is no class name.

C<3d5676e0af1f1cc6b539fb08a5ee67b7>

The main value returned, also the ID of the object returned that, when
matched with the class name (first item) would be able to define an
object to be fetched.

C<2>

The first 'extra' value. Successive 'extra' values are separated by
'-->' like the other fields.

=back

=head1 BUGS

None known, although the API may change in the near future.

=head1 TO DO

B<Review API>

The API is currently unstable but should solidify quickly as we get
more use out of this module.

 - Keep 'mixed' stuff in there, or maybe always treat the resultset as
 potentially heterogeneous objects?

 - Test with saving different types of non-object data as well as
 objects and see if the usage holds up (including with the
 ResultsIterator).

B<Objectify?>

Think about creating a 'search_results' object that can access the
resultset along with metadata about the results (number of items, time
searched, etc.). This would likely prove easier to work with in the
future. 

What would also be interesting is combine this with the interface for
L<SPOPS::Iterator> and the currently impelemented
L<OpenInteract::ResultsIterator>, so we could do something like:

 my $search = OpenInteract::ResultsManage->new( $search_id );
 print "Search initially run: $search->{search_date}\n",
       "Number of results: $search->{num_records}\n";
 $search->set_min( 10 );
 $search->set_max( 25 );
 while ( my $obj = $search->get_next ) {
   print "Object retrieved is a ", ref $obj, " with ID ", $obj->id, "\n";
 }

=head1 COPYRIGHT

Copyright (c) 2001 intes.net, inc.. All rights reserved.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 AUTHORS

Chris Winters <chris@cwinters.com>

=cut
