#
# $Id: filter_innd.pl,v 1.7 1997/06/13 14:47:53 brister Exp $
#
# Sample Perl filtering file for the innd hooks.
# 

# This file gets loaded at innd process startup, and everytime a 'reload
# perl xxx' or a 'reload all xxx' is done.
#
# Before this file is loaded, the perl routine `filter_before_reload' is
# called, and after it's finished loading, the perl routine
# `filter_after_reload' is called. See startup_innd.pl for more details.
#
# The following routines can be defined here for use by innd:
#
#	sub filter_art { ... }
#
#		This routine is called before every article is accepted for
#		posting. Is is called with no arguments, but has access to
#		all the non-empty standard headers of the article via the
#		global associative array `%hdr.'  If it returns the empty
#		string ("") then the article is accepted. If it returns any
#		non-null string value, then the article is rejected and the
#		returned string value is logged as the reason why.
#
#		The standard headers are:
#
#			Approved, Control, Date, Distribution, Expires,
#			From, Lines, Message-ID, Newsgroups, Path,
#			Reply-To, Sender, Subject, Supersedes, Bytes,
#			Also-Control, References
#
#	sub filter_mode { ... }
#
#		This routine is called every time `go', `pause', or
#		`throttle' or `shutdown' is called. It is called with
#		no arguments and returns no value. The global
#		associative array `%mode' has three keyed values
#		stored in it:
#
#			'Mode'		The current mode
#					   ("running", "paused", "throttled") 
#			'NewMode"	The new mode
#			'reason'	The reason given.
#
#		For example:	%mode =	('Mode', 'running', 
#					'NewMode', 'throttled', 
#					'reason', 'doing nightly backups')
#
#		There is actually no 'shutdown' mode so when shutdown is
#		called, the value for %mode{'Mode'} will
# 		be "throttled" and the value for %mode {'reason'} will 
#		be "shutting down"
#
# If filter_art is not defined when this file is done loading, then
# filtering is disabled. If any syntax error occurs when loading the file,
# then filtering is disabled.


#
# Called on each article innd receives from a peer. Return "" to accept,
# and any other non-null string to reject. If rejecting the string returned
# will be part of the logged reason.
#

# Jeff Garzik 1997-05-21.
sub filter_art {
    $MaxCrossposts = 40;	# max number of crossposts per article
    $MaxMultiPosts = 5;	# max article copies kept before EMP filter begins
    $ArticleHistSize = 3000; # keep history of last N message ids
    $ControlPassthru = 1;      # true (non-zero) to exempt cmsgs from filtering
    $EMPHistSize = 900;	# number of EMP ids to hold in memory


    ##
    ## allow control messages to be exempted from filtering if
    ## $ControlPassthru is true
    ##

    # For those who don't want to filter cancels...
    return "" if (($ControlPassthru) && ($hdr{"Control"}));


    ##
    ## For bulk spamming, a EMP filter with a longer memory
    ##

    ## build key message info buffer
    $i = "$hdr{'From'} $hdr{'Subject'} $hdr{'Lines'}";

    return "EMP rejected" if ($EMP{$i});


    ##
    ## MMF filter
    ##

    ## Filter out strings 'make money fast', 'cash cash cash',
    ## any string that completely matches
    ##   $$... CAPITAL LETTERS AND SPACES AND D-A-S-H-E-S $$... 

    return "MMF rejected"
	if (($hdr{"Subject"} =~ /make\s+money\s+fast/io) ||
	    ($hdr{"Subject"} =~ /cash\s*cash\s*cash/io) ||
	    ($hdr{"Subject"} =~ /^\s*\$+[A-Z\s\-]+\$+\s*$/o));


    ##
    ## ECP filter
    ##

    ## Split newsgroups line by ","
    @newsgroups_hdr = split(/,/, $hdr{"Newsgroups"});

    ## filter out resulting arrays whose size is larger than the limit.
    return "ECP rejected" if ($#newsgroups_hdr >= $MaxCrossposts);


    ##
    ## EMP filter
    ##

    ## default value is to accept the article
    $rval = "";

    ## Store From, Subject, and Lines in history array and hash
    push(@history, $i);
    $history{$i}++;

    ## If post appears more than high limit, save for
    ## continual rejection, outside of history window
    if ($history{$i} > $MaxMultiPosts) {
	while ($#EMP_Hist > $EMPHistSize) {
	    $tmp_hist = shift(@EMP_Hist);
	    delete $EMP{$tmp_hist} if (exists $EMP{$tmp_hist});
	}

	$EMP{$i} = 1;
	push(@EMP_Hist, $i);

	$rval = "new EMP detected";
    }

    ## Trim old entries from history file.  Remember that
    ## data structure stays around, even between filter.perl
    ## reloads.
    while ($#history > $ArticleHistSize) {
	$tmp_hist = shift(@history);
	next unless (exists $history{$tmp_hist});

	if ($history{$tmp_hist} < 2) {
	    delete $history{$tmp_hist};
	} else {
	    $history{$tmp_hist}--;
	}
    }

    ## return EMP value, rejected or "" for accepted
    return $rval;
}





sub filter_mode {
    if ($mode{"NewMode"} eq "throttled" && $mode{"reason"} eq "shutting down"){
	# innd is shuting down. Time to clean up and get out of here.
    } elsif ($mode{'NewMode'} eq "throttled" || $mode{'NewMode'} eq "paused") {
	# &close_spam_database ; # no this function doesn't really exist
    } else {
	# &open_spam_database ; # neither does this one.
    }
}




###########################################################################
##
## Another sample... from Christophe Wolfhugel <wolf@pasteur.fr>.
##


#### Regular expressions we reject.
#### Format : Header => regexp => reason
##%reject = (
##   'Subject'	=> {
##      'make.*money.*fast'		=>	'MMF rejected',
##      'cash.*cash.*cash'		=>	'Cash rejected'
##   },
##);
##
##sub filter_art {
##   my($rval) = '';
##   my(@ng, $i, $j, $k, $l);
##
##   if ($hdr{'From'} !~ /\@/o) {
##      $rval = 'Invalid From';
##   } else {
##      while (($i, $j) = each %reject) {
##         while (($k, $l) = each %{$j}) {
##            if ($hdr{$i} =~ /$k/i) {
##               $rval = $l;
##               goto the_end;
##            }
##         }
##      }
##   }
##   @ng = split(/,/, $hdr{'Newsgroups'});
##   if ($#ng > 10) {
##     $rval = 'ECP rejected';
##   }
##the_end:
##   undef %hdr;
##   return $rval
##}
##
##sub filter_mode {
##}
##
###%hdr = (
###	'Subject'	=>	'Make money fast',
###	'From'		=>	'bozo@gov.org'
###);
###&filter_art;


