#!/usr/local/bin/perl
# $File: //member/autrijus/Encode-HanConvert/bin/b2g.pl $ $Author: autrijus $
# $Revision: #5 $ $Change: 3801 $ $DateTime: 2003/01/24 21:18:22 $

$VERSION = '0.07';

=head1 NAME

b2g.pl - Convert from Big5 to GBK (CP936)

=head1 SYNOPSIS

B<b2g.pl> [ B<-p> ] [ B<-u> ] [ I<inputfile> ...] > I<outputfile>

=head1 DESCRIPTION

The B<b2g.pl>/B<g2b.pl> utility reads files sequentially, convert them
between GBK and Big5, then writing them to the standard output.  The
file operands are processed in command-line order.  If file is a single
dash (C<->) or absent, this program reads from the standard input.

The C<-p> switch enables rudimentary phrase-oriented substition via a
small built-in lexicon.  The C<-u> switch specifies that both the
input and output streams should be UTF-8 encoded.

Example usage:

    % b2g.pl -p < big5.txt > gbk.txt
    % b2g.pl -pu < trad.txt > simp.txt

=cut

use strict;
use Getopt::Std;

sub MAP ();

my %opts;
BEGIN {
    getopts('hup', \%opts);
    if ($opts{h}) { system("perldoc", $0); exit }
    $SIG{__WARN__} = sub {};
}

use constant UTF8 => $opts{u};
use constant DICT => $opts{d};

use Encode::HanConvert;

if (UTF8 and $] >= 5.008) { binmode(STDIN, ':utf8'); binmode(STDOUT, ':utf8') }

my $KEYS = join('|', sort { length($b) <=> length($a) } keys %{+MAP}) if DICT;
my $MAP  = +MAP if DICT;

while (<>) {
    if (UTF8) { Encode::HanConvert::trad_to_simp($_) }
	 else { Encode::HanConvert::big5_to_gb($_) }
    if (DICT) { use bytes; s/($KEYS)/$MAP->{$1}/g }
    print;
}

use constant MAP => DICT && {
'̫' => '̫',
'̫·' => '̫',
'žų˷' => 'žű',
'жϵ' => 'ϵ',
'' => 'ӿ',
'Ԫ' => '',
'°' => 'İ',
'ȿ' => '',
'' => '',
'̫' => 'Ա',
'̫' => 'з',
'̫' => 'ɻ',
'̫մ' => 'ɴ',
'' => 'Ƿ',
'' => 'ڲ',
'֧Ԯ' => '֧',
'ļ' => 'ҳ',
'ն' => 'ն',
'б' => '',
'ƴ' => '',
'Ƭ' => '',
'ܱ' => '˵',
'' => '',
'ʷ' => '˹',
'̫' => 'ռ',
'ϣ' => 'ʲ',
'ƽҵ' => 'в',
'ƽв' => 'ж˿',
'ƽ' => '',
'԰' => '׶԰',
'ĸ' => 'Ԫ',
'' => '',
'ʿ' => '˹',
'ٽ' => 'н',
'' => '',
'ȫ' => 'ȫ',
'' => '',
'' => '',
'ӡ' => 'ӡ',
'ӡ' => 'ӡ',
'' => '',
'' => '',
'Ȧ' => 'ѭ',
'Ӧ' => 'Ӧ',
'' => '',
'ִ' => 'ַ',
'' => 'ǰ׺',
'浵' => '',
'' => '',
'' => 'տ',
'' => '',
'' => '',
'Ԫ' => 'Ԫ',
'λַ' => 'ַ',
'' => '',
'֤' => '֤',
'ŷ' => '',
'ҵϵͳ' => 'ϵͳ',
'' => '',
'ͽ' => 'ͼ',
'ʱ' => 'ʵʱ',
'' => '',
'' => '',
'ȥ' => 'ȥ',
'' => '',
'궼˹' => '鶼˹',
'в' => 'ж˿',
'ɳ' => 'ɫ',
'ɳڵذ' => 'ɳذ',
'' => '',
'' => '',
'д' => 'д',
'' => '',
'' => '',
'Э' => 'Э',
'' => '',
'ܱ' => 'Χ',
'' => '',
'ע' => 'ע',
'ע' => 'ע',
'' => '',
'֪ʶ' => '֪ʶ',
'ʯ' => 'ʯ',
'Ƭ' => 'оƬ',
'羧' => '辧',
'հ׼' => 'ո',
'񼪶' => '𼪶',
'' => 'ž',
'ķ˹׳' => 'ķ˹',
'˹ƥ' => '˾ƥ',
'Ϲ' => '',
'ָ' => 'ָ',
'' => '',
'ӳ' => 'ӳ',
'η' => 'η',
'' => '',
'Ү' => 'ʥ',
'' => '',
'ʱ' => 'ʱ',
'Ƴ̳' => '⳵',
'' => '',
'˹' => '˹',
'ز' => 'ط',
'ʳ' => ';',
'' => 'ù',
'' => '',
'' => '¥',
'װ' => '',
'' => '',
'' => '',
'ˮӡ' => 'ˮӡ',
'' => 'ڳ',
'' => '',
'' => 'ؼ',
'' => '',
'̫' => '',
'C' => '',
'' => 'ڴ',
'O' => '',
'' => '',
'' => '',
'ʽ' => 'ӳ',
'' => 'չ',
'' => '',
'Ψ' => 'ֻ',
'ѵ' => 'ջ',
'ר' => 'Ŀ',
'ʽ' => '',
'˵Ҹ' => 'ҿ',
'ݾ' => 'ݷʽ',
'ɨ' => 'ɨ',
'' => '',
'' => '',
'϶' => 'ɶ',
'Ͽ' => 'ϼ',
'Ī' => 'Ī',
'Ƭ' => '',
'ͨѶ¼' => 'ͨѶ',
'ͨ' => 'ŵ',
'' => '',
'ʳ' => '',
'' => '',
'' => '',
'' => 'ι',
'' => '',
'Բ' => 'Բ',
'Ƭ' => 'оƬ',
'ǻ' => '',
'α' => '',
'' => '',
'ǳ' => 'ע',
'Ӳ' => 'Ӳ',
'ʽ' => '',
'' => '̿',
'¼' => '¼',
'' => '',
'' => '',
'ζ' => '̶',
'Ӵ' => '',
'ģ' => 'ģ',
'' => '',
'ʵ' => '',
'' => 'ڳ',
'' => '',
'' => '',
'ܽ' => '',
'¿' => '',
'»' => 'ί',
'й' => '',
'º' => 'ºʲ',
'' => '',
'' => '',
'' => '',
'ʥ' => 'ʥǸ',
'' => 'ֱ',
'' => '',
'' => 'ӱ',
'Ѷ' => 'Ϣ',
'' => '',
'Ԫ' => '',
'ʽ' => 'ʽ',
'Ҹ' => '޸',
'բ' => '',
'ӡ' => 'ӡ',
'׸' => '',
'羧' => '',
'紫Ѷ' => 'ͼĵ',
'' => '',
'Գʽ' => '',
'' => '緹',
'ͼʾ' => 'ͼ',
'' => 'ҵ',
'ߡ' => 'ۺ',
'㷨' => '㷨',
'Ƭ' => '',
'Ź' => 'ŵ',
'' => '',
'ŵ' => '',
'ŵ' => '',
'ŵ' => 'ļ',
'̼ϻ' => 'īۺ',
'' => '',
'' => 'Կ',
'·' => '',
'L' => 'ѹ',
'ʿ' => '',
'³ѩ' => '³',
'Զ' => 'Զ',
'' => '',
'' => '',
'ݻ' => 'ƽ',
'ݴ' => '',
'ǩ' => '',
'ģ' => 'ģ',
'ģ' => '',
'ģ' => '',
'ʯ' => 'ʯ',
'' => 'ģ',
'Ϳ' => 'Ϳ',
'Ϳ' => 'Ϳ',
'ҵ' => '',
'՞' => '',
'' => '',
'޹޹' => 'ѬѬ',
'ī' => 'ī',
'ڱ' => 'ǽ',
'' => '',
'·' => 'ɵ·',
'өĻ' => 'Ļ',
'ѯ' => 'ѯ',
'ŵ' => 'ŵ',
'ŵ' => 'ŵ',
'¼Ӱ' => '¼',
'' => '',
'倸' => '',
'' => '',
'˪' => '',
'' => '',
'' => 'ʾĸ',
'' => '',
'޹' => 'Ѭ',
'ת' => 'ת۵',
'Ѷ' => '',
'' => '',
'޲ֿ' => '޲',
'޵µ' => '޵õ',
'ʶ' => 'ʶ',
'ο' => 'ӿ',
'ǻ' => 'ʻ',
'ؼ' => 'ؼ',
'' => 'ģ',
'' => '',
'Я' => 'Яʽ',
'λ' => 'ֶ',
'' => '',
'Ȩ' => '',
'' => 'ƬĶ',
'̫' => '̫',
'ţ' => 'ţ',
'kͬ' => 'ͬ',
'F' => '',
};

__END__

=head1 SEE ALSO

L<g2b.pl>, L<Encode::HanConvert>

=head1 AUTHORS

Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>

=head1 COPYRIGHT

Copyright 2002 by Autrijus Tang E<lt>autrijus@autrijus.orgE<gt>.

This program is free software; you can redistribute it and/or 
modify it under the same terms as Perl itself.

See L<http://www.perl.com/perl/misc/Artistic.html>

=cut
