use strict;
use warnings;
package Archive::BagIt::Role::Manifest;

use Moo::Role;
with 'Archive::BagIt::Role::Plugin';

use namespace::autoclean;

has 'algorithm' => (
    is => 'rw',
    #isa=>'HashRef',
);

has 'manifest_file' => (
    is      => 'rw',
    lazy    => 1,
    builder => '_build_manifest_file',
);

sub _build_manifest_file {
    my $self = shift;
    my $algorithm = $self->algorithm()->name;
    my $file = $self->bagit->metadata_path."/manifest-$algorithm.txt";
    if (-f $file) {
        return $file;
    }
    return;
}


has 'tagmanifest_file' => (
    is      => 'rw',
    lazy    => 1,
    builder => '_build_tagmanifest_file'
);

sub _build_tagmanifest_file {
    my $self = shift;
    my $algorithm = $self->algorithm()->name;
    my $file = $self->bagit->metadata_path."/tagmanifest-$algorithm.txt";
    if (-f $file) {
        return $file;
    }
    return;
}

sub BUILD {}

after BUILD => sub {
    my $self = shift;
    my $algorithm = $self->algorithm->name;
    $self->{bagit}->{manifests}->{$algorithm} = $self;
};

sub verify_file {
}

sub verify {
}

sub manifest {
}


has 'manifest_entries' => (
    is => 'ro',
    lazy => 1,
    builder => '_build_manifest_entries',
);

has 'tagmanifest_entries' => (
    is => 'ro',
    lazy => 1,
    builder => '_build_tagmanifest_entries',
);

sub __build_xxxmanifest_entries {
    my ($self, $xxmanifest_file) = @_;
    my $xxmanifest_entries = {};
    my $bag_path = $self->bagit->bag_path();
    die("Cannot open $xxmanifest_file: $!") unless (open(my $XXMANIFEST, "<:encoding(utf8)", $xxmanifest_file));
    my $algo = $self->algorithm()->name;
    while (my $line = <$XXMANIFEST>) {
        chomp($line);
        my ($digest, $file) = split(/\s+/, $line, 2);
        next unless ((defined $digest) && (defined $file)); # empty lines!
        $xxmanifest_entries->{$algo}->{$file} = $digest;
    }
    close($XXMANIFEST);
    return $xxmanifest_entries;
}

sub _build_tagmanifest_entries {
    my ($self) = @_;
    my $tm_file = $self->tagmanifest_file();
    if (defined $tm_file) {
        return $self->__build_xxxmanifest_entries($tm_file);
    }
    return;
}

sub _build_manifest_entries {
    my ($self) = @_;
    my $m_file = $self->manifest_file();
    if (defined $m_file) {
        return $self->__build_xxxmanifest_entries($m_file);
    }
    return;
}

sub normalize_payload_filepath {
    my $filename = shift;
    $filename =~ s#[\\](?![/])#/#g; # normalize Windows Backslashes, but only if they are no escape sequences
    $filename =~ s#%#%25#g; # normalize percent
    $filename =~ s#\x{0a}#%0A#g; #normalize NEWLINE
    $filename =~ s#\x{0d}#%0D#g; #normlize CARRIAGE RETURN
    $filename =~ s# #%20#g; # space
    $filename =~ s#"##g; # quotes
    return $filename;
}


sub check_if_payload_filepath_violates{
    my $local_name = shift;
    # HINT: there is no guarantuee *not* to escape!
    return
        ($local_name =~ m/^~/) # Unix Home
            || ($local_name =~ m#\./#) # Unix, parent dir escape
            || ($local_name =~ m#^[A-Z]:[\\/]#) # Windows Drive
            || ($local_name =~ m#^/#) # Unix absolute path
            || ($local_name =~ m#^$#) # Unix Env
            || ($local_name =~ m#^\\#) # Windows absolute path
            || ($local_name =~ m#^%[^%]*%#) # Windows ENV
            || ($local_name =~ m#^\*#) # Artifact of md5sum-Tool, where ' *' is allowed to separate checksum and file in fixity line
            || ($local_name =~ m#[<>:"?|]#) # Windows reserved chars
            || ($local_name =~ m#(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])#) # Windows reserved filenames
    ;
}

sub __calc_digest {
    my ($digestobj, $filename) = @_;
    open(my $fh, '<', $filename) || die("Can't open '$filename', $!");
    binmode($fh);
    my $digest = $digestobj->get_hash_string($fh);
    close $fh || die("could not close file '$filename', $!");
    return $digest;
}



# calc digest
# expects digestobj, expected_ref, array_ref of filenames
# returns arrayref of hashes where each entry has
# $tmp->{calculated_digest} = $digest;
# $tmp->{expected_digest} = $expected_digest;
# $tmp->{filename} = $filename;
sub calc_digests {
    my ($self, $bagit, $digestobj, $filenames_ref) = @_;
    my @digest_hashes;
    # check if we could use parallel
    my $is_parallelizeable;
    if (($self->bagit->has_parallel()) && (defined $self->bagit->parallel)) {
        my $err;
        ($is_parallelizeable, $err) = Class::Load::try_load_class("Parallel::Iterator");
        if (!$is_parallelizeable) {
            warn "Class 'Parallel::Iterator' could not be loaded…, $err\n";
            $self->{parallel} = undef;
        }
    }
    if ($is_parallelizeable) {
        my $class = Class::Load::load_class("Parallel::Iterator");
        $class->import( qw(iterate_as_array));
        @digest_hashes = iterate_as_array(
            sub {
                my ($idx, $localname) = @_;
                my $fullname = $bagit ."/". $localname;
                my $tmp;
                $tmp->{calculated_digest} = __calc_digest($digestobj, $fullname);
                $tmp->{local_name} = $localname;
                $tmp->{full_name} = $fullname;
                $tmp;
            }, $filenames_ref);
    }
    else { # fallback to serial processing
        @digest_hashes = map {
            my $localname = $_;
            my $fullname = $bagit ."/". $localname;
            my $tmp;
            $tmp->{calculated_digest} = __calc_digest($digestobj, $fullname);
            $tmp->{local_name} = $localname;
            $tmp->{full_name} = $fullname;
            $tmp;
        } @{$filenames_ref};
    }
    return \@digest_hashes;
}



sub _verify_XXX_manifests {
    my ($self, $xxprefix, $xxmanifest_entries, $files, $return_all_errors) =@_;
    # Read the manifest file
    my @files = grep {$_ !~ m/$xxprefix-[0-9a-zA-Z]+\.txt/}@{ $files };
    my @invalid_messages;
    my $bagit = $self->bagit->bag_path;
    my $version = $self->bagit->bag_version();
    my $alg = $self->algorithm()->name;
    my $subref_invalid_report_or_die = sub {
        my $message = shift;
        if (defined $return_all_errors) {
            push @invalid_messages, $message;
        } else {
            die($message);
        }
        return;
    };
    foreach my $local_name (@files) {
        # local_name is relative to bagit base
        unless (-r $bagit."/".$local_name) {
            &$subref_invalid_report_or_die(
                "cannot read $local_name (bag-path:$bagit)",
            );
        }
    }
    # Evaluate each file against the manifest

        my $digestobj = $self->algorithm();
        my $local_xxfilename = "${xxprefix}-${alg}.txt";
        my $xxfilename = "${bagit}/$local_xxfilename";

        # first check if each file from payload exists in manifest_entries for given alg
        foreach my $local_name (@files) {
            my $normalized_local_name = normalize_payload_filepath($local_name);
            # local_name is relative to bagit base
            unless (exists $xxmanifest_entries->{$alg}->{$normalized_local_name}) { # localname as value should exist!
                &$subref_invalid_report_or_die(
                    "file '$local_name' (normalized='$normalized_local_name') found, which is not in '$local_xxfilename' (bag-path:'$bagit')!"
                #"DEBUG: \n".join("\n", keys %{$xxmanifest_entries->{$alg}}),
                );
            }
        }
        # second check if each file from manifest_entries for given alg exists in payload
        foreach my $local_name (keys %{$xxmanifest_entries->{$alg}}) {
            if ( # to avoid escapes via manifest-files
                check_if_payload_filepath_violates( $local_name )
            ) {
                &$subref_invalid_report_or_die("file '$local_name' not allowed in '$local_xxfilename' (bag-path:'$bagit'")
            }
            else {
                unless (List::Util::any {normalize_payload_filepath($_) eq $local_name} @files) {
                    &$subref_invalid_report_or_die(
                        "file '$local_name' NOT found, but expected via '$local_xxfilename' (bag-path:'$bagit')!"
                    );
                }
            }
        }
        # all preconditions full filled, now calc all digests
        my $digest_hashes_ref = $self->calc_digests($bagit, $digestobj, \@files);
        # compare digests
        if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
            foreach my $digest_entry (@{$digest_hashes_ref}) {
                my $normalized = normalize_payload_filepath($digest_entry->{local_name});
                $digest_entry->{expected_digest} = $xxmanifest_entries->{$alg}->{$normalized};
                if ($digest_entry->{calculated_digest} ne $digest_entry->{expected_digest}) {
                    &$subref_invalid_report_or_die(
                        sprintf("file '%s' (normalized='$normalized') invalid, digest (%s) calculated=%s, but expected=%s in file '%s'",
                            $digest_entry->{local_name},
                            $alg,
                            $digest_entry->{calculated_digest},
                            $digest_entry->{expected_digest},
                            $xxfilename
                        )
                    );
                }
            }
        }

    if($return_all_errors && (scalar @invalid_messages > 0)) {
        push @{ $self->bagit->{errors} },
            join("\n\t",
                sort @invalid_messages
            );
        return;
    }
    return 1;
}

sub verify_manifest {
    my ($self, $payload_files, $return_all_errors) = @_;
    if ($self->manifest_file()) {
        return $self->_verify_XXX_manifests(
            "manifest",
            $self->manifest_entries(),
            $payload_files,
            $return_all_errors
        );
    }
    return;
}

sub verify_tagmanifest {
    my ($self, $non_payload_files, $return_all_errors) = @_;
    if ($self->tagmanifest_file()) {
        return $self->_verify_XXX_manifests(
            "tagmanifest",
            $self->tagmanifest_entries(),
            $non_payload_files,
            $return_all_errors
        );
    }
    return;
}

sub create_manifest {
    my ($self) = @_;
    my $algo = $self->algorithm->name;
    my $manifest_file = $self->bagit->metadata_path."/manifest-${algo}.txt";
    # Generate digests for all of the files under ./data
    open(my $fh, ">",$manifest_file) or die("Cannot create manifest-${algo}.txt: $!\n");
    foreach my $rel_payload_file (@{$self->bagit->payload_files}) {
        #print "rel_payload_file: ".$rel_payload_file;
        my $payload_file = File::Spec->catdir($self->bagit->bag_path, $rel_payload_file);
        my $digest = $self->algorithm->verify_file( $payload_file );
        my $normalized_payload_file = normalize_payload_filepath($rel_payload_file);
        print($fh "$digest  $normalized_payload_file\n");
        #print "lineout: $digest $filename\n";
    }
    close($fh);
    return 1;

}

sub create_tagmanifest {
    my ($self) = @_;
    my $algo = $self->algorithm->name;
    my $tagmanifest_file = $self->bagit->metadata_path . "/tagmanifest-${algo}.txt";
    open(my $fh, ">", $tagmanifest_file) or die("Cannot create tagmanifest-${algo}.txt: $! \n");
    foreach my $rel_nonpayload_file (@{$self->bagit->non_payload_files}) {
        my $nonpayload_file = File::Spec->catdir($self->bagit->bag_path, $rel_nonpayload_file);
        if ($rel_nonpayload_file =~ m/tagmanifest-.*\.txt$/) {
            # Ignore, we can't take digest from ourselves
        }
        elsif (-f $nonpayload_file) {
            # non-payload is all which is not payload, this allows user to define and handle own subdirs
            my $digest = $self->algorithm->verify_file($nonpayload_file);
            my $normalized_nonpayload_file = normalize_payload_filepath($rel_nonpayload_file);
            print($fh "$digest  $normalized_nonpayload_file\n");
        }
        else {
            die("A file or directory that doesn't match: $rel_nonpayload_file");
        }
    }
    close($fh);
    return 1;
}

no Moo;
1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Archive::BagIt::Role::Manifest

=head1 VERSION

version 0.060

=head1 NAME

Archive::BagIt::Role::Manifest

=head1 VERSION

version 0.060

=head2 calc_digests($bagit, $digestobj, $filenames_ref, $opts)

Method to calculate and return all digests for a a list of files using a Digest-object. This method will be overwritten by C<Archive::BagIt::Fast>.

If object ist build with option C<parallel>, the digests will be build in parallel.

=head1 AVAILABILITY

The latest version of this module is available from the Comprehensive Perl
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
site near you, or see L<https://metacpan.org/module/Archive::BagIt/>.

=head1 SOURCE

The development version is on github at L<https://github.com/Archive-BagIt>
and may be cloned from L<git://github.com/Archive-BagIt.git>

=head1 BUGS AND LIMITATIONS

You can make new bug reports, and view existing ones, through the
web interface at L<http://rt.cpan.org>.

=head1 AUTHOR

Rob Schmidt <rjeschmi@gmail.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2020 by Rob Schmidt and William Wueppelmann and Andreas Romeyke.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=head1 AUTHOR

Rob Schmidt <rjeschmi@gmail.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2020 by Rob Schmidt and William Wueppelmann and Andreas Romeyke.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
