#!/usr/local/bin/perl
# $File: //member/autrijus/Encode-HanConvert/bin/g2b.pl $ $Author: autrijus $
# $Revision: #2 $ $Change: 3578 $ $DateTime: 2003/01/16 17:39:16 $

$VERSION = '0.05';

=head1 NAME

g2b.pl - Convert from GBK (CP936) to Big5

=head1 SYNOPSIS

B<g2b.pl> [ -p ] [ I<inputfile> ...] > I<outputfile>

=head1 DESCRIPTION

The B<b2g.pl>/B<g2b.pl> utility reads files sequentially, convert them
between GBK and Big5, then writing them to the standard output.  The
file operands are processed in command-line order.  If file is a single
dash (C<->) or absent, this program reads from the standard input.

The C<-p> switch enables rudimentary phrase-oriented substition via a
small built-in lexicon.

Example usage:

    % g2b.pl -p < gbk.txt > big5.txt

=cut

use strict;

(system("perldoc", $0), exit) if (grep /^-h/i, @ARGV);

$SIG{__WARN__} = sub {};

require Encode::HanConvert;
sub MAP ();

if ($ARGV[0] eq '-p') {
    shift @ARGV;

    my $KEYS = join('|', map quotemeta, sort { length($b) <=> length($a) } keys %{+MAP});
    my $MAP  = +MAP;

    while (<>) {
	Encode::HanConvert::gb_to_big5($_);
	{ use bytes; s/($KEYS)/$MAP->{$1}/g }
	print;
    }
}
else {
    while (<>) {
	Encode::HanConvert::gb_to_big5($_);
	print;
    }
}    

use constant MAP => { reverse (
'A' => 'H',
'AӺ' => 'HӺ',
'EEk' => 'EE',
'K' => 'K`',
'XC' => 'C',
'_I' => '_I',
'H' => '`',
'' => 'f',
'' => 'ե',
'ƥ]' => ']',
'ɵҸ' => 'ɯŦҸ',
'ɾ' => 'ɧ',
'̧x' => 'ˮx',
'ӪŤH' => 'ѭ',
'ӪŦ' => 'tzA',
'Ӫű' => 'ѭ',
'ӪŲ' => 'tz',
'ګ' => 'ຸڳ',
'fWï' => 'fï',
'䴩' => '',
'' => '',
'麸ҥ' => 'հҥ',
'ı׶' => '',
'' => '',
'y' => '',
'\' => '',
']' => 'c',
'vFL' => 'jL',
'~Ӫ' => '~hŶ',
'' => 'e',
'' => '',
'@~' => 'æާ@',
'' => 'æݤf',
'u' => 'æu',
'X' => '',
'' => '',
'' => 'ƾ',
'hy' => '𴵴',
'' => '',
'о' => 'X',
'' => '',
'@' => '@',
'B' => 'ΦB',
'CL' => 'L',
'L' => 'L',
'V' => 'Vۥ',
']' => ']l',
'j' => '`',
'^' => 'T',
'h[' => 'h̥[',
'r' => 'rŦ',
'r' => 'e',
's' => 'sL',
'Ⱦ' => 'ھ',
'жq' => 'q',
'' => 'JX',
'褸' => '',
'}' => 'a}',
'C' => 'C',
'' => '',
'A' => 'AȾ',
'@~t' => 'ާ@t',
'Bu' => 'B',
'Cy' => 'CŻy',
'Y' => '',
'l' => 'lۥ',
'Դ' => 'xԴ',
'ǦC' => 'ݤf',
'˰' => '',
'FԪo' => 'Ԫo',
'FQaԧB' => 'FSԧB',
'HA' => 'IA',
'M' => '^M',
'n' => 'n',
'g' => 'gO@',
'Ӻֽu' => 'ӽƽu',
'L' => '䥦',
'w' => 'ĳ',
'b' => 'uʱ',
'P' => '~',
'}' => '}ۥ',
'ݸO' => 'ݭO',
'`}' => '}`',
'`' => '`',
'' => '§',
'' => 'H',
'' => '',
'ѥl' => 'Ѥl',
'' => 'ֺ',
'' => 'ֺ',
'q' => 'ֺ',
'ť' => 'Ů',
'N' => 'CN',
'w' => '|',
'i' => 'iS',
'F' => 'qǪL',
'cM' => 'KM',
'n' => 'f',
'˳H^' => '˳H',
'' => 'w',
'A' => 'A',
'HѤ' => 'Ѥ',
'Mg' => 'M',
'ȥ' => 'ȱۥ',
'ۮe' => 'ݮe',
'Cϸ`' => 'tϸ`',
'I' => 'Iۥ',
'^o' => '^T',
'pɾ' => 'wɾ',
'p{' => 'X',
'p' => 'p⾹',
'}i' => '}',
'' => '^',
'' => '\',
'˷' => '˾`',
'Ծ' => 'Ծ',
'M˳n' => 'n]',
'y' => '',
'z' => 'zۥ',
'}' => 'ݵ}',
'' => 'ݳ',
'b' => 'ѽb',
'' => '@',
'BL' => 'L',
'QC' => 'Q',
'QHH' => 'QII',
'}r' => 'hr',
'D' => 'y',
'Q' => '',
'ż' => 'Qż',
'޻K' => 'vK',
'㰩' => 'm',
'`p' => '`p',
'' => 'g',
'O' => 's',
'sf' => 'st',
'}C' => 'Ʋ',
'' => '',
'Ƶ{' => 'l{',
'ɦW' => 'XiW',
'հն' => 'ԩԶ',
'Ū' => 'uŪ',
'|' => '',
'M' => '',
'`' => 'ҵ{',
'dDf' => 'dIfJ',
'|' => 'ֱ覡',
'y' => 'y',
'Ұ' => 'E',
'wz' => 'Īѩwz',
'd' => '[',
'˻' => '',
'S' => 'S',
'J' => '',
'n' => '',
'qT' => 'qTï',
'qD' => 'HD',
'su' => 'p',
't' => '\\',
'Z' => '`',
'HH' => 'II',
'KK' => '',
'¤l' => 'l',
'¶Ѷ' => 'ĽѶ',
'·' => 'F',
'ټM' => '~M',
'' => 'ݴ',
'' => '',
'' => '',
'' => '',
'z' => '',
'' => '',
'Lka' => '^ka',
'fX' => 'U',
'nX' => '`P',
'w' => 'w',
'{' => '{',
'{Ǳ' => 'L{',
'' => '',
'' => '',
'' => 'o',
'' => '',
'y' => 'Sy',
'Ҭr' => 'Ϭr',
'' => 'f',
'Ws' => 'W챵',
'lϸ' => 'lFsX',
'¨HH' => '¨II',
'ü' => 'H',
'׬y' => '`u',
'콦' => '',
'JԲ{' => 'XJԲ{',
'|' => 'e|',
'P' => 'Pۥ',
'suL' => 'su',
'tHH' => 'tII',
'' => 'B',
'ƹ' => '',
'qjQ' => 'NjQ',
'ta' => 'taȭ',
'ѪR' => 'v',
'ѽX' => 'ĶX',
'պ' => 'ql',
'T' => 'H',
'J' => '[',
'B⤸' => 'ާ@',
'B⦡' => 'F',
'O~' => 'Ʀ~',
'P@' => 'Q@',
'dI' => 'I',
'hD' => '',
'pgL' => 'EL',
'p' => '',
'q' => '',
'qǵT' => 'Ϥq',
'q' => 'p',
'q{' => 'p{',
'q' => 'q',
'ù' => 'o',
'ϥ' => 'ϼ',
'GF' => 'GH',
'PX' => 'X',
'jf' => 'f',
'tk' => 'k',
'E' => '',
'' => '詥',
'Ϥ' => 'ϽL',
'ϭy' => 'ϹD',
'ϰ' => '',
'Ϻ' => 'ϽL',
'Ϻо' => 'ϽLXʾ',
'Ϻ' => 'ϽL',
'үX' => '',
'ְL' => 'ֺL',
'' => '_',
'' => '',
'éégg' => 'XX',
'ég' => 'X',
'`' => 'j',
'h' => 'b',
'|' => '|',
'' => '{',
'' => '',
'f' => '\f',
'۰' => '\\۰',
'x' => '',
'ƾھ' => 'ըѽվ',
'Ȧs' => 'ws',
'ñ' => '',
'Ҳ' => 'Ҷ',
'' => 'u',
'p' => 'up⾹',
'Y' => 'L',
'd' => 'ҪO',
'kk' => 'JJ',
'k' => 'J',
'uW@~' => 'pާ@',
'' => '',
'R' => 'ľR',
'A~' => 'A',
'Bn' => '̽',
'K' => 'K',
'HH' => 'II',
'' => '',
'p' => 'Ap',
'nq' => 'q',
'ù' => '̹',
'Ը' => 't',
'հҦa' => 'հҩ',
'հҲ' => 'հҩ',
'v' => '',
'J' => '箫',
'z' => 'zB',
'V' => '',
'ͥ' => 'ͱۥ',
'' => 'ҿ',
'鿤' => '',
'NO' => 'BNO',
'n' => 'H',
'²' => 'tܤZ',
'Ǿ' => 'ɾ',
'Ȥ' => '',
'I' => 'I',
'T' => 'n',
']]' => 'VV',
'ùLd' => 'ùd',
'ùù' => 'oo',
'ùwq' => 'ùoq',
'ùù' => 'oo',
'ùù۶' => 'oo۶',
'ѧOr' => 'Ѳ',
'߽' => 'k',
'' => '',
'r' => '',
'' => '',
'T' => '',
'a' => 'K⦡',
'' => 'rq',
'Iܹ' => 'Tܹ',
'Jg' => 'E',
'v' => 'OP',
'Ūd' => 'd\Ū',
'B' => '',
'eӧg' => 'ڮӧg',
'Ʀ' => '',
'Y' => 'AY',
'带' => '¦',
'' => '',
'P' => 'J',
'' => 'o',
'￦X' => 'X',
) }
__END__

=head1 SEE ALSO

L<b2g.pl>, L<Encode::HanConvert>

=head1 AUTHORS

Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>

=head1 COPYRIGHT

Copyright 2002 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.

This program is free software; you can redistribute it and/or 
modify it under the same terms as Perl itself.

See L<http://www.perl.com/perl/misc/Artistic.html>

=cut
