#! /bin/sh

# Last-Edit: Sat 9 Jan 88, by Jacob Gore (gore@EECS.NWU.Edu)

# arbitron -- this program produces rating sweeps for USENET.
# This version is for NOTES systems based on the Lou Salkind notes
# that keeps sequencers as editable ASCII in ~/.nfseq.
#
# To participate in the international monthly ratings sweeps, run
# "arbitron" every month, ON OR AFTER the first.
#
# This script is based on one written by Brian Reid at Stanford, and
# updated and bugfixed by Spencer Thomas at Univ. of Utah and Geoff
# Kuenning at SAH Counsulting.  It was written by Rich $alz at Mirror
# Systems.
#
# This version was rewritten more-or-less from the ground up by
# Tw Cook at Hewlett-Packard to correspond to the notes version
# used widely within HP.  There is no guarantee that this will
# work under anything but HP-UX (EECS.NWU.Edu runs it on 4.3BSD+NFS),
# although changes (if any) for other systems should be minimal.
#
# This script was modified by Jacob Gore at Northwestern University
# to make it portable enough to run on 4.3BSD with some customization,
# and later to take advantage of the features of the new awk (as described
# in the AWK book).

# This script will likely have to be run by root or daemon so that
# it will be able to read everyone's .nfseq files.
#
# (At EECS.NWU.Edu, it is run from the notes account's crontab, so it is setuid
# to root, but executable by notes group only.)

######## Customization ###########################################

# Set your hostname as needed.
#
hostname=`hostname`;

# The name of the new awk.  NOTE that the awk program will not work with the 
# old version of awk (there is a separate, slightly more restricted version
# of arbitron that uses the old awk).
#
AWK=__AWK__

# LowUID and highUID should be set to the lowest and highest uid of real users
# on your system.  Nonusers like "rje" and "daemon" don't count.
#
lowUID=10000;
highUID=32766;

# Newsgroups is the list of groups you want to report on; this should be
# a pattern suitable for awk.  For normal usenet use there is no need
# to change this.
#
newsgroups='^soc|^talk|^comp|^sci|^rec|^misc|^news|^unix-pc|^alt|^bionet'

# Local newsgroups that you want to get local reports on.  Enables with the
# -l option, if given.
#
localgroups='^eecs|^list|^nwu|^chi'

# Mail addresses you want to send the output to.   To participate in the
# normal Usenet ratings sweep, netaddress should eventually
# reach:
#   uucp path: {sun, pyramid, hplabs, bellcore}!decwrl!netsurvey
#   internet: netsurvey@decwrl.DEC.COM
#
# Local reports are sent to localaddress only; Usenet reports are sent
# to both netaddress and localaddress.
#
netaddress="netsurvey@decwrl.DEC.Com"
localaddress="notes"

# The pattern for the command to deliver the data to the survey site.
# It must be quoted with single quotes, since $address won't be defined
# till later.
#
# If you are not running MMDF, your deliver command will probably be:
#	normaldeliver='mail -s Arbitron-data $address'
#
normaldeliver='v6mail -s Arbitron-data -t $address'
debugdeliver='cat'	# For the "-x" (debug) option

# Number of days back to check.  Overruled by the -t option, if given.
#
defaulttimelimit=14

# Minimum number of readers to report (per notesgroup).  Overruled by the
# -c option, if given.
#
defaultcutoff=14

# SysV systems should have "/usr/bin:/bin" in their path;
# BSD systems should also include "/usr/ucb".
#
PATH=/usr/bin:/bin:/usr/ucb:/usr/local/bin

# Go down to the line after the "CHECK HERE" comment, and fix up the awk
# variable assignments.  After that, this script will be ready.

######## End Customization #######################################

arbout=/tmp/arbo$$
tmpfile=/tmp/arbt$$

deliverpattern=$normaldeliver
timelimit=$defaulttimelimit
cutoff=$defaultcutoff;
address="$netaddress $localaddress"

libdir=__LIBDIR__

# If the following line does not work, just manually set it
# to the notes version (should look like "Notes 2.7 86/07/02").
# If your version of notes does not tell you this information
# when you run the "notes" command, then odds are this script
# is not going to work anyway.

notesversion=`strings $libdir/inotes | __GREP__ '^Notes 2' | __SED__ 1q`

while [ $# -gt 0 ]
do
    case $1 in

	-c) 	# Cutoff value below which nf readership is not reported
		shift
		cutoff=$1
		shift ;;

	-c*)	# ditto, specified a different way
		cutoff=`expr substr $1 3 1`
		shift ;;

	-t) 	# Time limit, if other than the default
		shift
		timelimit=$1
		shift ;;

	-t*)	# ditto, specified a different way
		timelimit=`expr substr $1 3 1`
		shift ;;

	-x*)	# Turn on debugging - usually just means do not mail it.
		deliverpattern=$debugdeliver
		shift ;;

	-l*)	# Show local groups only
		newsgroups=$localgroups
		address=$localaddress
		shift ;;

	*)
	    	echo "Usage: arbitron [-c #] [-x] [-l] [-t #]"
	    	exit 1 ;;
    esac
done

export tmpfile hostname notesversion cutoff timelimit;
export lowUID highUID newsgroups;

$AWK '
BEGIN {

    FS = ":";
    white_space = "[ \t]+";

    OK = 1; ERROR = -1;		# for getline

    split("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec", months,      " ");
    split("  0  31  59  90 120 151 181 212 243 273 304 334", month_start, " ");
    for (mon in months) {
	month_num[months[mon]] = mon;
    }
    february = month_num["Feb"];

    "echo $cutoff"       | getline cutoff;
    "echo $timelimit"    | getline time_limit;
    "echo $tmpfile"      | getline temp_file
    "echo $hostname"     | getline hostname
    "echo $notesversion" | getline notes_version
    "echo $lowUID"       | getline low_uid
    "echo $highUID"      | getline high_uid
    "echo $newsgroups"   | getline groups_of_interest
    "date" | getline date_line;
    split(date_line, date, white_space);

    ##############  Check the form of the output which your "date" command
    # CHECK HERE #  produces, and set the index numbers accordingly.
    ##############

    # Pattern for BSD 4.3: "Wed Oct 22 14:00:00 CDT 1986"
    date_now=since_1973(date[2], date[3], date[6]);
}

# Cruise home directories of all users to see if they have a .nfseq file.  If
# so, then assume the user is a notes reader - count them as such, and save a
# copy of the .nfseq file for subsequent crunching.  Users with a shell not
# containing "sh" are skipped, as are those outside low_uid <= uid <= high_uid.
($3 >= low_uid && $3 <= high_uid && $7 ~ /sh/) {

    ++total_users;
    seq_file = $6 "/.nfseq";

    if ( (getline seq_line < seq_file) != ERROR ) {

	++notes_users;
	do {
	    # Sample .nfseq entry:
	    # hp.graphics: Tue Sep 30 09:50:07 1986

	    split(seq_line, seq, white_space);
	    if (seq[1] ~ groups_of_interest) {
		nf_name = substr (seq[1], 1, length(seq[1])-1);
		date_read = since_1973(seq[3], seq[4], seq[6]);
		recent = (date_read + time_limit >= date_now);

		# If this notesfile has been read recently, then increment the
		# readership count for it.  Otherwise, skip it.
		if (recent) {
		    ++active[nf_name];
		}
	    }
	} while ( (getline seq_line < seq_file) == OK);
	close seq_file;

    }
}

END {
    print "Host        " hostname
    print "Users       " total_users
    print "NetReaders  " notes_users
    print "ReportDate  " date_line
    print "SystemType  " notes_version

    # Prepare the summary readership information - ignore any notesfiles
    # with less readers than "cutoff" (default 1).
    for (nf in active) {
	if (active[nf] >= cutoff) \
	    printf("%d %s\n", active[nf], nf) > temp_file
    }

    # Print the summary information in sorted order
    while ("sort -n -r < " temp_file | getline) {
	print;
    }
}

function since_1973(month_name, day, year) {
# Computes the number of days between the beginning of 1973 and
# month/day/year.
#
# 1973 was chosen because it is a reasonably distant year that immediately
# follows a leap year (making computatins simpler).
#
# Note: this function will probably not work in the next century, since
# it assumes that all years divisible by 4 are leap years.
#
# month_name: "Jan"-"Dec"; day, year: integers.

	month = month_num[month_name];
	period = year - 1973;
	tally = period * 365;	# at least this many days in previous years

	# Handle leap years between 1972 and this year (non-inclusively)
	tally += int(period/4);

	# Add in days in this year already passed
	tally += month_start[month];
	# Compensate for long February, if necessary
	if ( (year % 4 == 0) && (month > february) ) {
		++tally;
	}

	# Finally, add days in this month already passed. 
	tally += day;

	return tally;
}
' < /etc/passwd > $arbout

# Ship off the output to wherever it goes...
eval "$deliverpattern < $arbout"

rm -f $arbout $tmpfile
exit 0

------- End of Forwarded Message

