#!/usr/bin/perl

=begin metadata

Name: strings
Description: extract strings
Author: Nathan Scott Thompson, quimby at city-net dot com
License:

=end metadata

=cut


# Copyright 1999 Nathan Scott Thompson <quimby at city-net dot com>

=head1 NAME

strings - extract strings

=head1 SYNOPSIS

	strings [-afo] [-n length] [-t {d|o|x}] [file ...]

=head1 DESCRIPTION

C<strings> prints strings gleaned from the given files
(or from standard input.)  By default strings of less than 4 characters
are ignored.  A string is considered to be any sequence of graphical ASCII
characters (plus space and tab).

Options:

  -f            Print the file name with each string.

  -n length     Set the minimum length of strings to print; 4 by default.

  -o            Print each string with the octal offset in the file.
                (Same as -to).

  -t {d|o|x}    Print each string with the offset in the file;
                specify decimal, octal or hexadecimal respectively.

The -a option is included for compatibility with older versions but does
nothing.

=head1 BUGS

  Things are seldom what they seem,
  Skim milk masquerades as cream;
  Highlows pass as patent leathers;
  Jackdaws strut in peacock's feathers.

=cut

use strict;

use File::Basename qw(basename);
use Getopt::Std qw(getopts);

use vars qw($opt_a $opt_f $opt_o $opt_n $opt_t);

use constant EX_SUCCESS => 0;
use constant EX_FAILURE => 1;

my $Program = basename($0);

sub usage {
	warn "usage: $Program [-afo] [-n length] [-t {d|o|x}] [file ...]\n";
	exit EX_FAILURE;
	}

getopts( 'afon:t:' ) or usage();
if (defined $opt_n) {
    if ($opt_n !~ m/\A[0-9]\Z/ || $opt_n == 0) {
        warn "$Program: invalid minimum string length '$opt_n'\n";
        exit EX_FAILURE;
    }
} else {
    $opt_n = 4;
}

if ($opt_o) {
    $opt_t = 'o';
} elsif (defined $opt_t) {
    my %EXPECT = (
        'd' => 1,
        'o' => 1,
        'x' => 1,
    );
    usage() unless $EXPECT{$opt_t};
}
my $offset_format = "\%07$opt_t ";

# Consider all graphic characters plus space and tab to be printable.
# Escape all punctuation characters out of paranoia.

my $punctuation = join '\\', split //, q/`~!@#$%^&*()-+={}|[]\:";'<>?,.\//;
my $printable = '\w \t' . $punctuation;
my $chunksize = 4096; # whatever

for my $filename ( @ARGV )
{
    next if -d $filename;
    my $in;
    unless (open $in, '<', $filename) {
        warn "$Program: Can't open '$filename': $!\n";
        exit EX_FAILURE;
    }
    binmode $in;
    scanfile($in, $filename);
    close $in;
}
unless (@ARGV) {
    scanfile(*STDIN, '<stdin>');
}
exit EX_SUCCESS;

sub scanfile {
    my ($fh, $filename) = @_;
    my $offset = 0;

    while ($_ or read($fh, $_, $chunksize))
    {
        $offset += length($1) if s/^([^$printable]+)//o;
        my $string = '';

	do {
            $string .= $1 if s/^([$printable]+)//o;
        } until ($_ or !read($fh, $_, $chunksize));

        if ( length($string) >= $opt_n )
        {
            print $filename, ':' if $opt_f;
            printf $offset_format, $offset if $opt_t;
            print $string, "\n";
        }
        $offset += length($string);
    }
}
