#!/opt/bin/perl

=head1 NAME

perl-libextractor - determine perl library subsets for building distributions

=head1 SYNOPSIS

   perl-libextractor ...

General Options:

   -v                verbose
   --version         display version
   --bindir path     perl executable target ("exe")
   --dlldir path     shared library target ("dll")
   --scriptdir path  executable script target ("bin")
   --libdir path     perl library target ("lib")
   -I path           prepend path to @INC
   --no-packlists    do not use packlists

Selection:

   -Mmodule          load and trace module
   --script progname add executable script
   --eval | -e str   trace execution of perl string
   --perl            add perl interpreter itself
   --core-support    add core support
   --unicore         add unicore database
   --core            add perl core library
   --glob glob       add library files select by glob
   --filter pat,...  apply include/exclude patterns
   --runtime-only    remove files not needed for execution

Modes:

   --list            list source and destination paths

   --dstlist         list destination paths

   --srclist         list source paths

   --copy path       copy files to a target path
      --strip           strip .pl and .pm files
      --cache-dir path  cache directory to use
      --binstrip "..."  strip binaries and dlls

=head1 DESCRIPTION

This program can be used to extract a subset of your perl installation,
with the intention of building software distributions. Or in other words,
this module finds all files necessary to run a perl program (library
files, perl executable, scripts).

The resulting set can then be displayed or copied to another directory,
while optionally stripping the perl sources to make them smaller.

=head2 OPTIONS

This manpage gives only rudimentary documentation for options that have an
equivalent in L<Perl::LibExtractor>, so look there for details.

Options are not processed in order specified on the commandline, but in
multiple phases (e.g., C<-I> gets executed before any C<-M> option).

=head3 GENERAL OPTIONS

These options configure basic settings.

=over 4

=item C<--verbose>

=item C<-v>

Increases verbosity - highly recommended for interactive use.

=item C<--version>

Display the version of L<Perl::LibExtractor>.

=item C<--exedir> I<path>

Specifies the subdirectory for binary executables (the perl interpreter itself), instead of
the default F<exe/>.

=item C<--dlldir> I<path>

Specifies the subdirectory for shared libraries, instead of
the default F<dll/>.

=item C<--bindir> I<path>

Specifies the subdirectry for perl scripts, instead of
the default F<bin/>.

=item C<--libdir> I<path>

Specifies the subdirectory for perl library files, instead of
the default F<lib/>.

=item C<-I> I<path>

Prepends the given path to C<@INC> when searching perl library directories
(the last C<-I> option is prepended first).

=item C<--no-packlists>

Packlists allow to package all of a distribution, including resource files
not found through the normal tracing mechanism. This option disaables use
of packlists (normally highly recommended).

Some especially broken perls (Debian GNU/Linux...) have missing files,
so this option doesn't work with them, at least not for any packages
distributed by debian (packages installed through CPAN or any other
non-dpkg-mechanism work fine).

=back

=head3 SELECTION

These options specify and modify module selections. They are executed in
the order stated on the commandline, and when in doubt, you should use
them in the order documented here.

=over 4

=item C<-M>I<module>

Load the named module and trace direct dependencies
(e.g. F<-MCarp>). Same as C<add_mod> in L<Perl::LibExtractor>.

=item C<--script> I<progname>

Compile the (installed) script I<progname> and trace dependencies
(e.g. F<corelist>). Same as C<add_bin> in L<Perl::LibExtractor>.

=item C<--eval> I<string>

=item C<-e str> I<string>

Compile and execute the givne perl code, and trace dependencies (e.g. F<-e
"use AnyEvent; AnyEvent::detect">). Same as C<add_eval> in L<Perl::LibExtractor>.

=item C<--perl>

Adds the perl interpreter itself, including libperl if required to run
perl. Same as C<add_perl> in L<Perl::LibExtractor>.

=item C<--core-support>

Add all support files needed to support built-in features of perl (such
as C<ucfirst>), which is usually the minimum you should add from the core
library. Same as C<add_core_support> in L<Perl::LibExtractor>.

=item C<--unicore>

Add the whole unicore database, which is big, contains many, many files
and is usually not needed to run a program. Same as C<add_unicore> in
L<Perl::LibExtractor>.

=item C<--core>

Add the complete perl core library, which includes everything added
by F<--core-support> and F<--unicore>. Same as C<add_core> in
L<Perl::LibExtractor>.

Some especially broken perls (Debian GNU/Linux...) have missing files, so
this option doesn't work with them.

=item C<--glob glob>

Add all files from the perl library directories thta match the given
extended glob pattern. Same as C<add_glob> in L<Perl::LibExtractor>, also
see there for the syntax of glob patterns.

Example: add AnyEvent.pm and all AnyEvent::xxx modules installed.

   --glob Coro --glob "Coro::*"

=item C<--filter pat,...>

Apply a comma-separated series of extended glob patterns, prefixed by
C<+> (include) or C<-> (for exclude patterns).  Same as C<filter> in
L<Perl::LibExtractor>, also see there for exact semantics and syntax.

Example: remove all F<*.al> files in F<auto/POSIX>, except F<memset.al>.

   --filter "+/lib/auto/POSIX/memset.al,-/lib/auto/POSIX/*.al"

=item C<--runtime-only>

Remove all files not needed to run any scripts, such as debug info
files, link libraries, pod files and so on. Same as C<runtime_only> in
L<Perl::LibExtractor>.

=back

=head3 MODES

These options select a specific work mode. Work modes might have specific
options to control them further.

=over 4

=item C<--list>

Lists all selected files in two columns, first column is destination path,
second column is source path and first line is a header line.

=item C<--dstlist>

Same as C<--list>, but only list destination paths and has no header -
intended for scripts.

=item C<--srclist>

Same as C<--list>, but only list source paths and has no header -
intended for scripts.

=item C<--copy> I<path>

Copy all selected files to the respective position under the directory
I<path> - previous contents of the directory will be list.

This mode has the following suboptions:

=over 4

=item C<--strip>

Strip all C<.pm>, C<.pl>, C<.al> files and perl executables, which
mostly means removal of unecessary whitespace and documentation - see
L<Perl::Strip> which is used.

=item C<--cache-dir> I<path>

Specify the cache dir to use - the default is F<~/.cache/perlstrip> for
cached stripped perl files (compatible to the C<perlstrip> program).

=item C<--binstrip> I<"...">

Use the specified program and arguments to strip executables, shared libraries and shared objects.

This is only necessary when your programs were compiled with debugging
info, but can be used to specify extra treatment for all binary files.

=back

=back

=head1 EXAMPLE

#TODO#

Let's first find out about the choice of paths for the subset. The
Deliantra client binary packages use L<Urlader> nowadays, and there it is
convenient to have F<perl> and any shared libraries directly in the root
of the distribution.

The perl library files are put into a directory named F<pm>, simply
because it's shorter than F<lib>, and in the future, some files might go
into F<lib>.

And finally, the F<deliantra> script itself is put into the perl library
directory, because it is not run directly - the installed client uses the
system fonts and other resources, while the binary package is supposed
to use the files packaged with it. To achieve this, a wrapper script is
created, called F<run>; which displays a splash screen and configures the
environment. A simplified version of it could look like this:

   @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
   $ENV{PANGO_RC_FILE} = "pango.rc";
   require "bin/deliantra";
   exit 0;

=head1 SEE ALSO

L<App::Staticperl>, L<Perl::Squish>.

=head1 AUTHOR

 Marc Lehmann <schmorp@schmorp.de>
 http://software.schmorp.de/pkg/staticperl.html

=cut

use common::sense;

use Getopt::Long;
use List::Util ();
use File::Path ();
use File::Basename ();
use File::Copy ();

use Perl::LibExtractor;
use Perl::Strip;

our @INC;

my $USE_PACKLISTS = 1;
my $VERBOSE;

my $STRIP;
my $BINSTRIP;
my $CACHE;
my $CACHEDIR;

my %DIR = qw(exe exe dll dll bin bin lib lib);

$|=1;

sub usage {
   require Pod::Usage;

   Pod::Usage::pod2usage (-output => *STDOUT, -verbose => 1, -exitval => 1, -noperldoc => 1);
}

@ARGV
   or usage;

Getopt::Long::Configure ("bundling", "no_auto_abbrev", "no_ignore_case");

my $ex;
my $set;
my (@phase0, @phase1, @phase2);

GetOptions
   "verbose|v"     => sub { ++$VERBOSE },
   "version"       => sub {
      warn "This is perl-libextractor version $Perl::LibExtractor::VERSION\n";
   },

   "exedir=s"      => \$DIR{exe},
   "dlldir=s"      => \$DIR{dll},
   "bindir=s"      => \$DIR{bin},
   "libdir=s"      => \$DIR{lib},

   "I=s"           => sub {
      my $arg = $_[1];
      unshift @phase0, sub { unshift @INC, $arg };
   },
   "no-packlists"  => sub { $USE_PACKLISTS = 0 },

   "M=s"           => sub {
      my $arg = $_[1];
      push @phase1, sub { $ex->add_mod ($arg) };
   },
   "script=s"      => sub {
      my $arg = $_[1];
      push @phase1, sub { $ex->add_bin ($arg) };
   },
   "eval|e=s"      => sub {
      my $arg = $_[1];
      push @phase1, sub { $ex->add_eval ($arg) };
   },
   "perl"          => sub {
      push @phase1, sub { $ex->add_perl };
   },
   "core-support"  => sub {
      push @phase1, sub { $ex->add_core_support };
   },
   "unicore"       => sub {
      push @phase1, sub { $ex->add_unicore };
   },
   "core"          => sub {
      push @phase1, sub { $ex->add_core };
   },
   "glob=s"        => sub {
      my $arg = $_[1];
      push @phase1, sub { $ex->add_glob ($arg) };
   },
   "filter=s"      => sub {
      my $arg = $_[1];
      push @phase1, sub { $ex->filter (split /,/, $arg) };
   },
   "runtime-only"  => sub {
      push @phase1, sub { $ex->runtime_only };
   },

   "list"          => sub {
      push @phase2, \&mode_list;
   },
   "dstlist"       => sub {
      push @phase2, \&mode_dstlist;
   },
   "srclist"       => sub {
      push @phase2, \&mode_srclist;
   },
   "copy=s"        => sub {
      my $arg = $_[1];
      push @phase2, sub { &mode_copy ($arg) };
   },

   "strip"         => \$STRIP,
   "cache-dir=s"   => \$CACHEDIR,
   "binstrip=s"    => \$BINSTRIP,

   or die "try $0 --help\n";

@phase2
   or usage;

# for strip_copy
my $STRIPPER = do {
   my @cache;

   unless (defined $CACHEDIR) {
      mkdir "$ENV{HOME}/.cache";
      $CACHEDIR = "$ENV{HOME}/.cache/perlstrip";
   }

   new Perl::Strip cache => $CACHEDIR, optimise_size => 1
};

sub strip_copy($$) {
   my ($src, $dst) = @_;

   my $text = do {
      open my $fh, "<:perlio", $src
         or die "$src: $!\n";

      local $/;
      <$fh>
   };

   printf "$dst: %d ", length $text if $VERBOSE >= 2;

   $text = $STRIPPER->strip ($text);

   printf "to %d bytes... ", length $text if $VERBOSE >= 2;

   open my $fh, ">:perlio", $dst
      or die "$dst: $!\n";
   length $text == syswrite $fh, $text
      or die "$dst: $!\n";
   close $fh;

   print "ok\n" if $VERBOSE >= 2;
}

sub mode_list {
   my @keys = sort keys %$set;
   my $width = List::Util::max map length, @keys;

   printf "%-*.*s %s\n", $width, $width, "SRC", "DST";
   for (@keys) {
      printf "%-*.*s %s\n", $width, $width, $_, $set->{$_}[0];
   }
}

sub mode_dstlist {
   print map "$_\n", sort keys %$set;
}

sub mode_srclist {
   print map "$set->{$_}[0]\n", sort keys %$set;
}

sub mode_copy {
   my $dst = shift;
   print "deleting $dst.\n" if $VERBOSE;
   File::Path::rmtree $dst, $VERBOSE >= 2, 0;

   print "populating $dst...\n" if $VERBOSE;

   my %mkdir;

   while (my ($path, $info) = each %$set) {
      $path =~ m%^([^/]+)(.*)/([^/]+)$%
         or die "$path: malformed destination path, please report.\n";

      my ($p1, $p2, $f) = ($1, $2, $3);

      my $dstdir = "$dst/$DIR{$p1}$p2";

      File::Path::mkpath $dstdir, $VERBOSE >= 2, 0777
         unless $mkdir{$dstdir}++;

      my $src = $info->[0];

      if (
         $STRIP
         && ($f =~ /\.(?:pm|pl|ix|al)$/
             || $p1 eq "bin")
      ) {
         strip_copy $src, "$dstdir/$f";

      } else {
         File::Copy::cp $src, "$dstdir/$f"
            or die "$src => $dstdir/$f: $!";

         if (
            $f =~ /\.(?:bundle|dyld|so|dll|sl)$/ # TODO
            || $p1 eq "exe"
            || $p1 eq "dll"
         ) {
            if (defined $BINSTRIP) {
               system "$BINSTRIP \Q$dstdir/$f";
            }

            chmod 0777 & ~umask, "$dstdir/$f";
         }
      }
   }
}

$ex = new Perl::LibExtractor use_packlists => $USE_PACKLISTS;

$_->() for @phase0;
$_->() for @phase1;

$set = $ex->set;

$_->() for @phase2;

__END__

      print "$file... " if $VERBOSE;

      my $output = defined $OUTPUT ? $OUTPUT : $file;

      my $src = do {
         open my $fh, "<:perlio", $file
            or die "$file: $!\n";

         local $/;
         <$fh>
      };

      printf "%d ", length $src if $VERBOSE;

      $src = (new Perl::Strip @cache, optimise_size => $OPTIMISE_SIZE)->strip ($src);

      printf "to %d bytes... ", length $src if $VERBOSE;
      print $output eq $file ? "writing... " : "saving as $output... " if $VERBOSE;

      open my $fh, ">:perlio", "$output~"
         or die "$output~: $!\n";
      length $src == syswrite $fh, $src
         or die "$output~: $!\n";
      close $fh;
      rename "$output~", $output;

      print "ok\n" if $VERBOSE;
   };

   if ($@) {
      print STDERR "$@\n";
      exit 2;
   }
}

__END__


die "cannot specify both --app and --perl\n"
   if $PERL and defined $APP;

# required for @INC loading, unfortunately
trace_module "PerlIO::scalar";

#############################################################################
# apply include/exclude

{
   my %pmi;

   for (@incext) {
      my ($inc, $glob) = @$_;

      my @match = grep /$glob/, keys %pm;

      if ($inc) {
         # include
         @pmi{@match} = delete @pm{@match};

         print "applying include $glob - protected ", (scalar @match), " files.\n"
            if $VERBOSE >= 5;
      } else {
         # exclude
         delete @pm{@match};

         print "applying exclude $glob - removed ", (scalar @match), " files.\n"
            if $VERBOSE >= 5;
      }
   }

   my @pmi = keys %pmi;
   @pm{@pmi} = delete @pmi{@pmi};
}

#############################################################################
# scan for AutoLoader, static archives and other dependencies

sub scan_al {
   my ($auto, $autodir) = @_;

   my $ix = "$autodir/autosplit.ix";

   print "processing autoload index for '$auto'\n"
      if $VERBOSE >= 6;

   $pm{"$auto/autosplit.ix"} = $ix;

   open my $fh, "<:perlio", $ix
      or die "$ix: $!";

   my $package;

   while (<$fh>) {
      if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
         my $al = "auto/$package/$1.al";
         my $inc = find_inc $al;

         defined $inc or die "$al: autoload file not found, but should be there.\n";

         $pm{$al} = $inc;
         print "found autoload function '$al'\n"
            if $VERBOSE >= 6;

      } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
         ($package = $1) =~ s/::/\//g;
      } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
         # nop
      } else {
         warn "WARNING: $ix: unparsable line, please report: $_";
      }
   }
}

for my $pm (keys %pm) {
   if ($pm =~ /^(.*)\.pm$/) {
      my $auto    = "auto/$1";
      my $autodir = find_inc $auto;

      if (defined $autodir && -d $autodir) {
         # AutoLoader
         scan_al $auto, $autodir
            if -f "$autodir/autosplit.ix";

         # extralibs.ld
         if (open my $fh, "<:perlio", "$autodir/extralibs.ld") {
            print "found extralibs for $pm\n"
               if $VERBOSE >= 6;

            local $/;
            $extralibs .= " " . <$fh>;
         }

         $pm =~ /([^\/]+).pm$/ or die "$pm: unable to match last component";

         my $base = $1;

         # static ext
         if (-f "$autodir/$base$Config{_a}") {
            print "found static archive for $pm\n"
               if $VERBOSE >= 3;

            push @libs, "$autodir/$base$Config{_a}";
            push @static_ext, $pm;
         }

         # dynamic object
         die "ERROR: found shared object - can't link statically ($_)\n"
            if -f "$autodir/$base.$Config{dlext}";

         if ($PACKLIST && open my $fh, "<:perlio", "$autodir/.packlist") {
            print "found .packlist for $pm\n"
               if $VERBOSE >= 3;

            while (<$fh>) {
               chomp;
               s/ .*$//; # newer-style .packlists might contain key=value pairs

               # only include certain files (.al, .ix, .pm, .pl)
               if (/\.(pm|pl|al|ix)$/) {
                  for my $inc (@INC) {
                     # in addition, we only add files that are below some @INC path
                     $inc =~ s/\/*$/\//;

                     if ($inc eq substr $_, 0, length $inc) {
                        my $base = substr $_, length $inc;
                        $pm{$base} = $_;

                        print "+ added .packlist dependency $base\n"
                           if $VERBOSE >= 3;
                     }

                     last;
                  }
               }
            }
         }
      }
   }
}

#############################################################################

print "processing bundle files (try more -v power if you get bored waiting here)...\n"
   if $VERBOSE >= 1;

my $data;
my @index;
my @order = sort {
   length $a <=> length $b
      or $a cmp $b
} keys %pm;

# sorting by name - better compression, but needs more metadata
# sorting by length - faster lookup
# usually, the metadata overhead beats the loss through compression

for my $pm (@order) {
   my $path = $pm{$pm};

   128 > length $pm
      or die "ERROR: $pm: path too long (only 128 octets supported)\n";

   my $src = ref $path
           ? $$path
           : do {
              open my $pm, "<", $path
                 or die "$path: $!";

              local $/;
              
              <$pm>
           };

   my $size = length $src;

   unless ($pmbin{$pm}) { # only do this unless the file is binary
      if ($pm =~ /^auto\/POSIX\/[^\/]+\.al$/) {
         if ($src =~ /^    unimpl \"/m) {
            print "$pm: skipping (raises runtime error only).\n"
               if $VERBOSE >= 3;
            next;
         }
      }

      $src = cache +($STRIP eq "ppi" ? "$UNISTRIP,$OPTIMISE_SIZE" : undef), $src, sub {
         if ($UNISTRIP && $pm =~ /^unicore\/.*\.pl$/) {
            print "applying unicore stripping $pm\n"
               if $VERBOSE >= 6;

            # special stripping for unicore swashes and properties
            # much more could be done by going binary
            $src =~ s{
               (^return\ <<'END';\n) (.*?\n) (END(?:\n|\Z))
            }{
               my ($pre, $data, $post) = ($1, $2, $3);

               for ($data) {
                  s/^([0-9a-fA-F]+)\t([0-9a-fA-F]+)\t/sprintf "%X\t%X", hex $1, hex $2/gem
                     if $OPTIMISE_SIZE;

#                  s{
#                     ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
#                  }{
#                     # ww - smaller filesize, UU - compress better
#                     pack "C0UU",
#                          hex $1,
#                          length $2 ? (hex $2) - (hex $1) : 0
#                  }gemx;

                  s/#.*\n/\n/mg;
                  s/\s+\n/\n/mg;
               }

               "$pre$data$post"
            }smex;
         }

         if ($STRIP =~ /ppi/i) {
            require PPI;

            if (my $ppi = PPI::Document->new (\$src)) {
               $ppi->prune ("PPI::Token::Comment");
               $ppi->prune ("PPI::Token::Pod");

               # prune END stuff
               for (my $last = $ppi->last_element; $last; ) {
                  my $prev = $last->previous_token;

                  if ($last->isa (PPI::Token::Whitespace::)) {
                     $last->delete;
                  } elsif ($last->isa (PPI::Statement::End::)) {
                     $last->delete;
                     last;
                  } elsif ($last->isa (PPI::Token::Pod::)) {
                     $last->delete;
                  } else {
                     last;
                  }

                  $last = $prev;
               }

               # prune some but not all insignificant whitespace
               for my $ws (@{ $ppi->find (PPI::Token::Whitespace::) }) {
                  my $prev = $ws->previous_token;
                  my $next = $ws->next_token;

                  if (!$prev || !$next) {
                     $ws->delete;
                  } else {
                     if (
                        $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float
                        or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/
                        or $prev->isa (PPI::Token::Structure::)
                        or ($OPTIMISE_SIZE &&
                            ($prev->isa (PPI::Token::Word::)
                               && (PPI::Token::Symbol:: eq ref $next
                                   || $next->isa (PPI::Structure::Block::)
                                   || $next->isa (PPI::Structure::List::)
                                   || $next->isa (PPI::Structure::Condition::)))
                           )
                     ) {
                        $ws->delete;
                     } elsif ($prev->isa (PPI::Token::Whitespace::)) {
                        $ws->{content} = ' ';
                        $prev->delete;
                     } else {
                        $ws->{content} = ' ';
                     }
                  }
               }

               # prune whitespace around blocks
               if ($OPTIMISE_SIZE) {
                  # these usually decrease size, but decrease compressability more
                  for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::) {
                     for my $node (@{ $ppi->find ($struct) }) {
                        my $n1 = $node->first_token;
                        my $n2 = $n1->previous_token;
                        $n1->delete if $n1->isa (PPI::Token::Whitespace::);
                        $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
                        my $n1 = $node->last_token;
                        my $n2 = $n1->next_token;
                        $n1->delete if $n1->isa (PPI::Token::Whitespace::);
                        $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
                     }
                  }

                  for my $node (@{ $ppi->find (PPI::Structure::List::) }) {
                     my $n1 = $node->first_token;
                     $n1->delete if $n1->isa (PPI::Token::Whitespace::);
                     my $n1 = $node->last_token;
                     $n1->delete if $n1->isa (PPI::Token::Whitespace::);
                  }
               }

               # reformat qw() lists which often have lots of whitespace
               for my $node (@{ $ppi->find (PPI::Token::QuoteLike::Words::) }) {
                  if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
                     my ($a, $qw, $b) = ($1, $2, $3);
                     $qw =~ s/^\s+//;
                     $qw =~ s/\s+$//;
                     $qw =~ s/\s+/ /g;
                     $node->{content} = "qw$a$qw$b";
                  }
               }

               $src = $ppi->serialize;
            } else {
               warn "WARNING: $pm{$pm}: PPI failed to parse this file\n";
            }
         } elsif ($STRIP =~ /pod/i && $pm ne "Opcode.pm") { # opcode parses its own pod
            require Pod::Strip;

            my $stripper = Pod::Strip->new;

            my $out;
            $stripper->output_string (\$out);
            $stripper->parse_string_document ($src)
               or die;
            $src = $out;
         }

         if ($VERIFY && $pm =~ /\.pm$/ && $pm ne "Opcode.pm") {
            if (open my $fh, "-|") {
               <$fh>;
            } else {
               eval "#line 1 \"$pm\"\n$src" or warn "\n\n\n$pm\n\n$src\n$@\n\n\n";
               exit 0;
            }
         }

         $src
      };

#      if ($pm eq "Opcode.pm") {
#         open my $fh, ">x" or die; print $fh $src;#d#
#         exit 1;
#      }
   }

   print "adding $pm (original size $size, stored size ", length $src, ")\n"
      if $VERBOSE >= 2;

   push @index, ((length $pm) << 25) | length $data;
   $data .= $pm . $src;
}

length $data < 2**25
   or die "ERROR: bundle too large (only 32MB supported)\n";

my $varpfx = "bundle_" . substr +(Digest::MD5::md5_hex $data), 0, 16;

#############################################################################
# output

print "generating $PREFIX.h... "
   if $VERBOSE >= 1;

{
   open my $fh, ">", "$PREFIX.h"
      or die "$PREFIX.h: $!\n";

   print $fh <<EOF;
/* do not edit, automatically created by mkstaticbundle */

#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>

/* public API */
EXTERN_C PerlInterpreter *staticperl;
EXTERN_C void staticperl_xs_init (pTHX);
EXTERN_C void staticperl_init (void);
EXTERN_C void staticperl_cleanup (void);

EOF
}

print "\n"
   if $VERBOSE >= 1;

#############################################################################
# output

print "generating $PREFIX.c... "
   if $VERBOSE >= 1;

open my $fh, ">", "$PREFIX.c"
   or die "$PREFIX.c: $!\n";

print $fh <<EOF;
/* do not edit, automatically created by mkstaticbundle */

#include "bundle.h"

/* public API */
PerlInterpreter *staticperl;

EOF

#############################################################################
# bundle data

my $count = @index;

print $fh <<EOF;
#include "bundle.h"

/* bundle data */

static const U32 $varpfx\_count = $count;
static const U32 $varpfx\_index [$count + 1] = {
EOF

my $col;
for (@index) {
   printf $fh "0x%08x,", $_;
   print $fh "\n" unless ++$col % 10;

}
printf $fh "0x%08x\n};\n", (length $data);

print $fh "static const char $varpfx\_data [] =\n";
dump_string $fh, $data;

print $fh ";\n\n";

#############################################################################
# bootstrap

# boot file for staticperl
# this file will be eval'ed at initialisation time

my $bootstrap = '
BEGIN {
   package ' . $PACKAGE . ';

   PerlIO::scalar->bootstrap;

   @INC = sub {
      my $data = find "$_[1]"
         or return;

      $INC{$_[1]} = $_[1];

      open my $fh, "<", \$data;
      $fh
   };
}
';

$bootstrap .= "require '//boot';"
   if exists $pm{"//boot"};

$bootstrap =~ s/\s+/ /g;
$bootstrap =~ s/(\W) /$1/g;
$bootstrap =~ s/ (\W)/$1/g;

print $fh "const char bootstrap [] = ";
dump_string $fh, $bootstrap;
print $fh ";\n\n";

print $fh <<EOF;
/* search all bundles for the given file, using binary search */
XS(find)
{
  dXSARGS;

  if (items != 1)
    Perl_croak (aTHX_ "Usage: $PACKAGE\::find (\$path)");

  {
    STRLEN namelen;
    char *name = SvPV (ST (0), namelen);
    SV *res = 0;

    int l = 0, r = $varpfx\_count;

    while (l <= r)
      {
        int m = (l + r) >> 1;
        U32 idx = $varpfx\_index [m];
        int comp = namelen - (idx >> 25);

        if (!comp)
          {
            int ofs = idx & 0x1FFFFFFU;
            comp = memcmp (name, $varpfx\_data + ofs, namelen);

            if (!comp)
              {
                /* found */
                int ofs2 =  $varpfx\_index [m + 1] & 0x1FFFFFFU;

                ofs += namelen;
                res = newSVpvn ($varpfx\_data + ofs, ofs2 - ofs);
                goto found;
              }
          }

        if (comp < 0)
          r = m - 1;
        else
          l = m + 1;
      }

    XSRETURN (0);

  found:
    ST (0) = res;
    sv_2mortal (ST (0));
  }

  XSRETURN (1);
}

/* list all files in the bundle */
XS(list)
{
  dXSARGS;

  if (items != 0)
    Perl_croak (aTHX_ "Usage: $PACKAGE\::list");

  {
    int i;

    EXTEND (SP, $varpfx\_count);

    for (i = 0; i < $varpfx\_count; ++i)
      {
        U32 idx = $varpfx\_index [i];

        PUSHs (newSVpvn ($varpfx\_data + (idx & 0x1FFFFFFU), idx >> 25));
      }
  }

  XSRETURN ($varpfx\_count);
}

EOF

#############################################################################
# xs_init

print $fh <<EOF;
void
staticperl_xs_init (pTHX)
{
EOF

@static_ext = ("DynaLoader", sort @static_ext);

# prototypes
for (@static_ext) {
   s/\.pm$//;
   (my $cname = $_) =~ s/\//__/g;
   print $fh "  EXTERN_C void boot_$cname (pTHX_ CV* cv);\n";
}

print $fh <<EOF;
  char *file = __FILE__;
  dXSUB_SYS;

  newXSproto ("$PACKAGE\::find", find, file, "\$");
  newXSproto ("$PACKAGE\::list", list, file, "");
EOF

# calls
for (@static_ext) {
   s/\.pm$//;

   (my $cname = $_) =~ s/\//__/g;
   (my $pname = $_) =~ s/\//::/g;

   my $bootstrap = $pname eq "DynaLoader" ? "boot" : "bootstrap";

   print $fh "  newXS (\"$pname\::$bootstrap\", boot_$cname, file);\n";
}

print $fh <<EOF;
  Perl_av_create_and_unshift_one (&PL_preambleav, newSVpv (bootstrap, sizeof (bootstrap) - 1));
}
EOF

#############################################################################
# optional perl_init/perl_destroy

if ($APP) {
   print $fh <<EOF;

int
main (int argc, char *argv [])
{
  extern char **environ;
  int exitstatus;

  static char *args[] = {
    "staticperl",
    "-e",
    "0"
  };

  PERL_SYS_INIT3 (&argc, &argv, &environ);
  staticperl = perl_alloc ();
  perl_construct (staticperl);

  PL_exit_flags |= PERL_EXIT_DESTRUCT_END;

  exitstatus = perl_parse (staticperl, staticperl_xs_init, sizeof (args) / sizeof (*args), args, environ);
  if (!exitstatus)
    perl_run (staticperl);

  exitstatus = perl_destruct (staticperl);
  perl_free (staticperl);
  PERL_SYS_TERM ();

  return exitstatus;
}
EOF
} elsif ($PERL) {
   print $fh <<EOF;

int
main (int argc, char *argv [])
{
  extern char **environ;
  int exitstatus;

  PERL_SYS_INIT3 (&argc, &argv, &environ);
  staticperl = perl_alloc ();
  perl_construct (staticperl);

  PL_exit_flags |= PERL_EXIT_DESTRUCT_END;

  exitstatus = perl_parse (staticperl, staticperl_xs_init, argc, argv, environ);
  if (!exitstatus)
    perl_run (staticperl);

  exitstatus = perl_destruct (staticperl);
  perl_free (staticperl);
  PERL_SYS_TERM ();

  return exitstatus;
}
EOF
} else {
   print $fh <<EOF;

EXTERN_C void
staticperl_init (void)
{
  extern char **environ;
  int argc = sizeof (args) / sizeof (args [0]);
  char **argv = args;

  static char *args[] = {
    "staticperl",
    "-e",
    "0"
  };

  PERL_SYS_INIT3 (&argc, &argv, &environ);
  staticperl = perl_alloc ();
  perl_construct (staticperl);
  PL_origalen = 1;
  PL_exit_flags |= PERL_EXIT_DESTRUCT_END;
  perl_parse (staticperl, staticperl_xs_init, argc, argv, environ);

  perl_run (staticperl);
}

EXTERN_C void
staticperl_cleanup (void)
{
  perl_destruct (staticperl);
  perl_free (staticperl);
  staticperl = 0;
  PERL_SYS_TERM ();
}
EOF
}

print -s "$PREFIX.c", " octets (", (length $data) , " data octets).\n\n"
   if $VERBOSE >= 1;

#############################################################################
# libs, cflags

{
   print "generating $PREFIX.ccopts... "
      if $VERBOSE >= 1;

   my $str = "$Config{ccflags} $Config{optimize} $Config{cppflags} -I$Config{archlibexp}/CORE";
   $str =~ s/([\(\)])/\\$1/g;

   open my $fh, ">$PREFIX.ccopts"
      or die "$PREFIX.ccopts: $!";
   print $fh $str;

   print "$str\n\n"
      if $VERBOSE >= 1;
}

{
   print "generating $PREFIX.ldopts... ";

   my $str = $STATIC ? "-static " : "";

   $str .= "$Config{ccdlflags} $Config{ldflags} @libs $Config{archlibexp}/CORE/$Config{libperl} $Config{perllibs}";

   my %seen;
   $str .= " $_" for grep !$seen{$_}++, ($extralibs =~ /(\S+)/g);

   for (@staticlibs) {
      $str =~ s/(^|\s) (-l\Q$_\E) ($|\s)/$1-Wl,-Bstatic $2 -Wl,-Bdynamic$3/gx;
   }

   $str =~ s/([\(\)])/\\$1/g;

   open my $fh, ">$PREFIX.ldopts"
      or die "$PREFIX.ldopts: $!";
   print $fh $str;

   print "$str\n\n"
      if $VERBOSE >= 1;
}

if ($PERL or defined $APP) {
   $APP = "perl" unless defined $APP;

   print "building $APP...\n"
      if $VERBOSE >= 1;

   system "$Config{cc} \$(cat bundle.ccopts\) -o \Q$APP\E bundle.c \$(cat bundle.ldopts\)";

   unlink "$PREFIX.$_"
      for qw(ccopts ldopts c h);

   print "\n"
      if $VERBOSE >= 1;
}

