#!perl
# gen-zh2 : auxiliary script for Chinese
#
# input files
#
#   zh.txt   (generated by gen-zh)
#   loc_zh.t (generated by gen-zh)
#   ../Collate/CJK/Big5.pm
#   ../Collate/CJK/GB2312.pm
#   ../Collate/CJK/Pinyin.pm
#   ../Collate/CJK/Stroke.pm
#
# output files
#
#   zh_big5.txt (equal to data/zh_big5.txt)
#   zh_gb.txt   (equal to data/zh_gb.txt)
#   zh_pin.txt  (equal to data/zh_pin.txt)
#   zh_strk.txt (equal to data/zh_strk.txt)
#   loc_zhb5.t  (a main part of t/loc_zhb5.t)
#   loc_zhgb.t  (a main part of t/loc_zhgb.t)
#   loc_zhpy.t  (a main part of t/loc_zhpy.t)
#   loc_zhst.t  (a main part of t/loc_zhst.t)
#
use strict;
use warnings;
use File::Spec;

my $pkg  = 'Unicode::Collate::CJK';
my @file = qw(Big5          GB2312            Pinyin         Stroke       );
my %type = qw(Big5 big5han  GB2312 gb2312han  Pinyin pinyin  Stroke stroke);
my %txt  = qw(Big5 big5     GB2312 gb         Pinyin pin     Stroke strk  );
my %t    = qw(Big5 b5       GB2312 gb         Pinyin py      Stroke st    );
my %test = qw(Big5 21       GB2312 18         Pinyin 11      Stroke 19    );
  # testcount += value of %test

open my $zh, "zh.txt" or die "zh.txt";
my $zhtxt = join '', <$zh>;
close $zh;

open my $zt, "loc_zh.t" or die "loc_zh.t";
my $zhtest = join '', <$zt>;
close $zt;

sub checkdata {
    my $file   = shift;
    my $init   = substr($file,0,1);
    my $count  = shift;

    my $d = File::Spec->updir();
    my $f = File::Spec->catfile($d, 'Collate', 'CJK', "$file.pm");
    open my $fh, "<$f" or die  $f;

    my @out;
    my $wt = 0x8000;
    my $isdata;
    while (<$fh>) {
	if (!$isdata) {
	   $isdata = 1 if /^__DATA__/;
	   next;
	}
	last if /^__END__/;
	my @c = split;
	for my $c (@c) {
	    next if !$c;
	    my $u = hex $c;
	    my $h = sprintf('%X', $wt);
	    push @out, [$c, $h] unless
		(0x4E00 <= $u && $u <= 0x9FA5 ||
		 0x3400 <= $u && $u <= 0x4DB5 ||
		0x20000 <= $u && $u <= 0x2A6D6);
	    # FA1F in DUCET is not overridable.
	    $wt++;
	}
    }
    close $fh;

    ### WRITE DATA ###
    my $textf = "zh_$txt{$file}.txt";
    open my $dh, ">$textf" or die $textf;
    binmode $dh;
    print $dh "use ${pkg}::${file}\n";
    print $dh "overrideCJK \\&${pkg}::${file}::weight${file}\n";
    print $dh $zhtxt;
    print $dh "$_->[0];[.$_->[1].20.2.$_->[0]]\n" for @out;
    close $dh;

    ### WRITE TEST ###
    my $testf = "loc_zh$t{$file}.t";
    open my $th, ">$testf" or die $testf;
    binmode $th;
    my $test = $zhtest;
       $test =~ s/('[Zz][Hh])(')/$1__$type{$file}$2/g;
       $test =~ s/(objZh)/$1$init/g;
       $test =~ s/(plan tests => )(\d+)/$1.($2+$test{$file})/e;
    print $th $test;
    close $th;
}

checkdata($_) for @file;
