#!/home/ben/software/install/bin/perl
use warnings;
use strict;
use utf8;
use FindBin '$Bin';
use JSON::Create 'write_json';
use lib "$Bin/lib";
use Unicode::Confuse::Parse 'parse_confusables';
use Getopt::Long;
use Convert::Moji 'make_regex';
use List::Util 'uniq';
use File::Slurper qw!read_text write_text!;
use feature 'signatures';
no warnings 'experimental::signatures';

my $file = '/home/ben/data/unicode/confusables/confusables.txt';
my $ok = GetOptions (
    "file=s" => \$file,
    verbose => \my $verbose,
);
if (! $ok || ! -f $file) {
    print <<EOF;
This script is part of the Unicode::Confuse Perl distribution.

It regenerates the JSON file distributed with this module. Run it as
follows:

    $0 --file </path/to/confusables.txt>

Download the confusables file from

    https://www.unicode.org/Public/security/latest/confusables.txt

EOF
    exit;
}
if ($verbose) {
    print "Parsing the file '$file':\n";
}
my $con = parse_confusables ($file, $verbose);
my $dir = "$Bin/lib/Unicode/Confuse";
if (! -d $dir) {
    system ("mkdir -p $dir");
}
my $out = "$dir/confusables.json";
if ($verbose) {
    print "Writing the data as JSON to $out\n";
}
my %thing;
$thing{confusables} = $con;
my @keys = keys %$con;
push @keys, uniq (values %$con);
my %rev;
for my $k (keys %$con) {
    my $v = $con->{$k};
    push @{$rev{$v}}, $k;
}
for my $k (keys %rev) {
    $rev{$k} = [uniq sort @{$rev{$k}}];
}
$thing{reverse} = \%rev;
write_json ($out, \%thing, indent => 1, sort => 1);
write_regex_module (\@keys);
exit;

sub write_regex_module ($keys)
{
    # This looks clunky but we can't "use" this module, because it
    # "uses" the thing we are about to write out, and that will be
    # circular, so we read it and grab the version using a regex.
    my $pm = read_text ("$dir.pm");
    my $version;
    if ($pm =~ /\$VERSION\s*=\s*(\S+);/) {
	$version = $1;
    }
    if (! defined $version) {
	die "Failed to read $dir.pm";
    }
    my @inputs = @$keys;
    @inputs = map {quotemeta} @inputs;
    @inputs = sort {
	length($b) <=> length($a) || $a cmp $b
    } @inputs;
    my $re = join ('|', map ("$_\n", @inputs));
    my $out = "$dir/Regex.pm";
    my $reout = <<EOF;
# This module was generated by $0

package Unicode::Confuse::Regex;
use warnings;
use strict;
our \$VERSION = $version;

# Some people, when faced with a problem, think "I know, I'll use a
# regular expression". Now they have two problems.

our \$re = qr!
$re!x;

1;
EOF
    if (-f $out) {
	chmod 0644, $out;
    }
    write_text ($out, $reout);
    chmod 0444, $out;
}


