#!/usr/local/bin/perl -w

###########################################################################
# Time-stamp: <97/06/05 13:09:44 klassa>
#
# cite.pl -- A minimal "supercite.el" workalike.
#
# This is icky, icky code.  If some clueful person would care to
# clean it up, I'd be most grateful! :-)
###########################################################################

local($format, $attribution, $count, $arg, $date, $from, @files, $attr,
      $fmt, $curr, $tmp, @tmp, $out, $use_raw, $erase, $spcidx, @sline,
      $thisline, $linewidth, $tlw);

#
# The default width of the lines you want in your final output (can be
# overridden on the command line with the -w switch).
#

$linewidth = 80;

#
# Set the attribution to '', initally.  Formatting is on by default.
#

$attribution = '';
$format      = 1;

#
# Shut the compiler up
#

$a = $b = '';

#
# Grab the command-line arguments.
#

while ($arg = shift)
{
    if    ($arg =~ /-a(.*)/) { $attribution = $1;            }
    elsif ($arg eq "-f")     { $format      = 1;             }
    elsif ($arg eq "-nf")    { $format      = 0;             }
    elsif ($arg =~ /^-h/)    { print while (<DATA>); exit 0; }
    elsif ($arg =~ /-w(.*)/) { $linewidth   = $1;            }
    else                     { push (@files, $arg);          }
}

#
# This is what I use to reformat the text...  Par, with a whole lot of
# weird-looking flags.
#

$fmt = 'par -w' . $linewidth . q* -r'TbgqR' -B'=.?_A_a' -Q='_s>|'*;

#
# Set ARGV so that the <> in the while loop (below) does the right
# thing & doesn't attempt to process files named after our command line
# switches. :-)
#

@ARGV = @files;

#
# Grab information out of the header.
#

while (<>)
{
    &chomp;

    #
    # If we've hit the header/body separator, bail.
    #

    last if /^\s*$/;

    #
    # If we've got the "From" line, extract the relevant parts.
    #

    if (/^From: (.*)/)
    {
        ($from, $attr) = &munge($_);
        $attribution = $attr unless $attribution;
    }

    #
    # Pick out the date, if on the "Date" line.
    #

    $date = $1 if /^Date: (.*)/;
}

#
# Get rid of the time at the end of the date, and replace days of the form
# 01,02,03,... with the single-digit form.
#

$date =~ s/ [0-9:]*( ([\+-][0-9]*|[A-Z]*))?( \(.*\))?$//;
$date =~ s/\b0([0-9])\b/$1/;

#
# Grab the body & do the attribution...  Work into a temporary variable
# so that we can check return value from the use of "par" (and take
# appropriate counter measures, if necessary), easily.
#

$out  = "\n>>>>> On $date,\n>>>>> \"$attribution\" == $from wrote:\n";
undef $curr;

while (<>)
{
    #
    # If this is a previous "attribution header", write it out as is:
    #

    if (/^>>>>>/)
    {
	if ((!defined($curr) || $curr ne '>>>>>') && !$count)
	{
	    $curr = '>>>>>';
	    $out .= "\n";        # don't increment $count
	}
	$out .= $_;
	next;
    }

    &chomp;

    #
    # If the line contains a >, assume it's from a previous attribution.
    # Attempt to extract the relevant portion.  For example, in a line
    # like "Bob> Tom> okay, go ahead", we want the attribution to be
    # "Tom" and the text to be attributed to be "okay, go ahead" (i.e.
    # the "Bob" part is irrelevant, since Bob was just quoting Tom; what
    # we want to keep is the notion that Tom said something)...
    #

    $tmp = '';

    if (/>/)
    {
        @tmp = split(/>/);

        #
        # Use the second to last and last items in the array, which will
        # be (hopefully) the last attribution and the actual text to be
        # attributed.
        #

        ($tmp, $_) = ($tmp[$#tmp-1], $tmp[$#tmp]);

        #
        # If the attribution is blank, use a '+'.
        #

        $tmp = '+' unless defined($tmp);
        $tmp =~ s/^\s+|\s+$//g;
        $tmp = '+' if $tmp eq '';

        #
        # To make the -w and 'use strict' happy, make sure $_ has a
        # value in the event that the earlier split caused it to become
        # undefined.
        #

        $_ = '' unless defined($_);
    }

    #
    # Clean up the line by removing leading/trailing space.
    #

    s/^\s+|\s+$//g;

    #
    # If the attribution has changed or there's a blank line in the
    # text, and we haven't emitted a blank line already, do so.  The
    # intent is to separate logical portions of the message without
    # allowing double (or greater) spacing.
    #

    $out .= "\n" if ((!defined($curr) || $tmp ne $curr || $_ eq '')
		     && !$count++);

    #
    # If the line isn't empty, spit it out.  We make sure that there aren't
    # any lines with words too long for par to handle (by splitting them --
    # no, it's not nice).
    #

    @sline = ();
    $tlw = $linewidth - (length(($tmp) ? $tmp : $attribution) + 4);

    while ($format && (length > $tlw))
    {
	$count = 0;
	$spcidx = &max(rindex($_, " ", $tlw-1), rindex($_, "\t", $tlw-1),
		       rindex($_, "-", $tlw-1));
	if ($spcidx < 0) { $spcidx = $tlw-1; }
	push(@sline, substr($_, 0, $spcidx+1));
	$sline[$#sline] =~ s/\s+$//g;
	$_ = substr($_, $spcidx+1);
	s/^\s+//g;
    }

    if ($_ ne '')
    {
        $count = 0;
	push(@sline, $_);
    }

    foreach $thisline (@sline)
    {
        $out .= sprintf("  %s> %s\n", ($tmp) ? $tmp : $attribution, $thisline);
    }

    #
    # Remember the attribution, so that we've got something to compare
    # against on the next pass.
    #

    $curr = $tmp;
}

#
# Do the actual output...  If formatting is desired, use par.  If not,
# or if par fails, just emit the text as it is.
#

$use_raw = 1;
$erase = 0;

if ($format)
{
    open(PAR, "| $fmt > /tmp/$$");  # failure case checked later...
    print PAR $out;
    close(PAR);

    $erase = 1;

    unless ($?)
    {
        if (open(INP, "/tmp/$$"))
        {
            print while(<INP>);
            close(INP);
            $use_raw = 0;
        }
    }
}

print $out if $use_raw;

unlink "/tmp/$$" if ($erase && -e "/tmp/$$");

###########################################################################
# chomp - we don't have Perl5, so this does the same as chomp would.
###########################################################################

sub chomp
{
    if ($/ eq "") { s#(^.*)\n$#$1#o; } else { s#(^.*)$/#$1#o; }
}

###########################################################################
# max - returns the greatest of its args
###########################################################################

sub max
{
    local(@sorted) = sort compare @_;
    return $sorted[0];
}

###########################################################################
# compare - subroutine for the sort command above
###########################################################################

sub compare
{
    $b <=> $a;
}

###########################################################################
# munge -- Rip out the good stuff & format the result.
###########################################################################

sub munge
{
    local($line) = @_;
    local($attr, $addr, $name);

    $line =~ s/^From:\s+//;

    $name = '';
    $addr = $line;

    if ($line =~ /</)
    {
        ($addr) = ($line =~ /<([^>]+)>/);
        ($name = $line) =~ s/<[^>]+>//;
    }
    elsif ($line =~ /\(/)
    {
        ($name) = ($line =~ /\(([^\)]+)\)/);
        ($addr = $line) =~ s/\([^\)]+\)//;
    }

    $addr =~ s/^\s+|\s+$//g;    $addr =~ s/[\(\)<>"]//g;
    $name =~ s/^\s+|\s+$//g;    $name =~ s/[\(\)<>"]//g;

    ($attr = $name || $addr) =~ s/@.*//;
    $attr =~ s/(\b\S)([^\s]+)/$1/g;
    $attr =~ s/\s+//g;

    $name =~ s/^\s+|\s+$//g;

    return ($name, $attr);
}


__END__

usage: supercite.pl [-f] [-nf] [-aATTR] [-h] [file1 [file2 [file...]]]

    use -f  to force formatting (via "par"),
        -nf to force no-formatting,
        -a  to force a particular attribution (e.g. "-aScott"),
        -w  to specify the width of the output (e.g. "-w80"),
        -h  for this option summary

May be run as a filter, or with one or more named input files.
Note that no whitespace is allowed after the "-a" option.  To
use whitespace, use quotes (as in "-a'Joe Smith'").



