#!/usr/bin/perl -w
#-----------------------------------------------------------------------------
#
#	$Id: oofilesearch 0.005 2004-07-06$
#
#-----------------------------------------------------------------------------

=head1	NOM

	oofilesearch 0.005

	Search utility, selecting OpenOffice.org files matching_fileing a given
	list of keywords in their content or in their metadata.

=head1	USAGE

	oofilesearch [-options] <files filter> <keyword list>

	oosearch "D:\Documents\*.sxw" openoffice desktop XML

	Note:

		The files filter may content one or more space-separated
		paths. Each path may content jokers.

=head1	DESCRIPTION

	The selected files are echoed to the standard output (one file per
	line), so this utility can be used as a filter piping its results
	to another program.

	The files filter may content one or more space-separated paths.
	Each path may content jokers.

	The following arguments in any number (after the files filter) are
	processed as search criteria.

=head1	OPTIONS

	-R -recursive
		include the subdirectories of each given search directory
	-verbose -trace -debug
		echo some processing comments
	-warnings
		activate the warning messages of the OpenOffice::OODoc API
	-log <file>
		like -verbose, but then messages are sent to the given file
		and don't pollute the standard output
	-result <file>
		send the list of matching_fileing files to the given file and not
		to the standard output
	-criteria <file>
		get search criteria from a file (one per line); the loaded
		search keywords may be combined with additional criteria
		passed with the command line, if any.
	-command <command> -exec <command>
		execute a shell command for each matching_fileing file ; if the
		command string contains "%f", this substring is replaced
		with the name of the selected file ; if this option is
		provided, the selection list is not echoed to the standard
		output ; if -verbose is on, the value returned by the
		command is echoed
	-encoding <encoding>
		selects the user's character set ; this option is
		mandatory if one or more search criteria contains some local,
		non-ASCII characters

=cut

#=============================================================================

use	OpenOffice::OODoc	1.107;
use	Getopt::Long;

our	$VERSION		= 0.005;

#=============================================================================


my	$recursive	= undef;
my	$verbose	= undef;
my	$warnings	= undef;
my	$command	= undef;
my	$result		= undef;
my	$log		= undef;
my	$list		= undef;
my	$character_set	= undef;
my	$RESULT		= *STDOUT;
my	$LOG		= *STDOUT;

GetOptions
	(
	'R|recursive'			=> \$recursive,
	'verbose|trace|debug'		=> \$verbose,
	'warnings'			=> \$warnings,
	'log=s'				=> \$log,
	'result|output=s'		=> \$result,
	'command|exec=s'		=> \$command,
	'criteria=s'			=> \$list,
	'encoding=s'			=> \$character_set
	);

#=============================================================================

my	@keywords	= ();
my	$count		= 0;

#=============================================================================

sub	horodate
	{
	my @d = localtime();
	return sprintf
		(
		"[%02d/%02d/%04d %02d:%02d:%02d] ",
		$d[3], $d[4], $d[5] + 1900, $d[2], $d[1], $d[0]
		);
	}

sub	message
	{
	my $text = shift;
	return unless ($verbose);
	print $LOG horodate() . "$text\n";
	print $LOG "\t$_\n" for @_;
	}

#-----------------------------------------------------------------------------

sub	matching_file
	{
	my $file	= shift;
	my @words	= @_;
	my $n		= scalar @words;
	my $text	= "";

	my $oof	= ooFile($file);
	unless ($oof)
		{
		message	"$file doesn't look like an OpenOffice.org file";
		return undef;
		}
	my $meta	= ooMeta(archive => $oof) or message
				"$file doesn't contain metadata";
	if ($meta)
		{
		my $title = $meta->title;
		if ($title)
			{
			message "Title: \"$title\"";
			$text .= $title;
			}
		else
			{
			message "Title: <UNTITLED>";
			}
		$text .= ($meta->keywords || "");
		$text .= ($meta->subject || "");
		$text .= ($meta->description || "");
		}
	my $content	= ooText(archive => $oof) or message
				"$file doesn't have a regular content";
	$text .= ($content->getTextContent || "") if $content;
	return undef unless $text;
	while (@words)
		{
		my $word = shift @words or next;
		return undef unless $text =~ /$word/i;
		}
	return 1;
	}

#-----------------------------------------------------------------------------

sub	file_selection
	{
	my @list = @_;
	my $number = scalar @list;

	message "$number file(s) in the search list";
	FILE: foreach my $file (@list)
		{
		unless (-r $file)
			{
			message "$file : unreadable";
			next FILE;
			}
		if (-l $file)
			{
			message "$file : symbolic link, ignored";
			next FILE;
			}
		if ((-d $file) && $recursive)
			{
			message "Searching in $file";
			file_selection(glob("$file/*"));
			next FILE;
			}
		unless (-s $file)
			{
			message "$file : empty";
			next FILE;
			}
		unless (-f $file)
			{
			message "$file is not a regular file";
			next FILE;
			}
		message "Processing $file";
		if (matching_file($file, @keywords))
			{
			message "OK! $file matches all the criteria";
			if ($command)	
				{
				my $cmd = $command;
				$cmd =~ s/\%f/$file/g;
				message "Executing command: $cmd";
				my $r = system $cmd;
				message "Command result is $r";
				}
			else	
				{
				print $RESULT "$file\n";
				}
			$count++;
			}
		else
			{
			message "file $file doesn't match";
			}
		}
	}

#=============================================================================
# main program

if ($result)
	{
	open RESULT, ">", $result
		or die "output file $result is unwritable\n";
	$RESULT = *RESULT;
	}

if ($log)
	{
	open OUTPUT, ">>", $log
		or die "log file $log is unwritable\n";
	$LOG = *OUTPUT;
	$verbose = 1;
	}

if ($list)
	{
	message "Loading a keyword list from $list";
	my $m;
	open LIST, "<", $list or warn "file $list is unreadable\n";
	while ($m = <LIST>)
		{
		chomp $m;
		push @keywords, $m;
		}
	close LIST;
	}

localEncoding($character_set)	if $character_set;

die "Usage: oofilesearch [-options] <filefilter> [keywords]\n"
		unless $ARGV[0];

message "Starting the search...";

my $filter = shift @ARGV;
push @keywords, @ARGV;
die "Empty keword list.\n" unless @keywords;
message "Keyword list:", @keywords;
unless ($warnings)
	{
	$SIG{'__WARN__'} = sub {};
	}
file_selection(glob($filter));

message "Finished - $count file(s) selected";
exit;

#=============================================================================

