package ShiftJIS::String;

use Carp;
use strict;
use vars qw($VERSION $PACKAGE @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

$VERSION = '0.11';
$PACKAGE = 'ShiftJIS::String'; # __PACKAGE__

require Exporter;
@ISA = qw(Exporter);

%EXPORT_TAGS = (
    'issjis' => [ qw/issjis/ ],
    'string' => [ qw/length index rindex strspn strcspn strrev substr/ ],
    'ctype'  => [ qw/toupper tolower/ ],
    'tr'     => [ qw/mkrange strtr trclosure/ ],
    'kana'   => [ qw/hi2ka ka2hi hiXka/ ],
    'H2Z'    => [ qw/kataH2Z kanaH2Z spaceH2Z/ ],
    'Z2H'    => [ qw/kataZ2H kanaZ2H spaceZ2H/ ],
);

$EXPORT_TAGS{all}  = [ map @$_, values %EXPORT_TAGS ];
$EXPORT_TAGS{core} = [ map @$_, @EXPORT_TAGS{ qw/issjis string ctype tr/ } ];

@EXPORT_OK = @{ $EXPORT_TAGS{all} };
@EXPORT = ();

my $Char = '(?:[\x81-\x9F\xE0-\xFC][\x00-\xFF]|[\x00-\xFF])';

##
## issjis(LIST)
##
sub issjis
{
  for (@_) {
    my $str = $_;
    $str =~ s/[\x00-\x7F\xA1-\xDF]|[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]//g;
    return '' if CORE::length($str);
  }
  return 1;
}

##
## length(STRING)
##
sub length($) {
  my $str = shift;
  return 0 + $str =~ s/$Char//go;
}

##
## strrev(STRING)
## 
sub strrev($) {
  my $str = shift;
  join '', reverse $str =~ /$Char/go;
}

##
## index(STRING, SUBSTR; POSITION)
## 
sub index($$;$)
{
  my $cnt = 0;
  my($str, $sub) = @_;
  my $len = &length($str);
  my $pos = @_ == 3 ? $_[2] : 0;
  if ($sub eq "") {
    return $pos <= 0 ? 0 : $len <= $pos ? $len : $pos;
  }
  return -1 if $len < $pos;
  my $pat = quotemeta($sub);
  $str =~ s/^$Char//o ? $cnt++ : croak
    while CORE::length($str) && $cnt < $pos;
  $str =~ s/^$Char//o ? $cnt++ : croak
    while CORE::length($str) && $str !~ /^$pat/;
  return CORE::length($str) ? $cnt : -1;
}

##
## rindex(STRING, SUBSTR; POSITION)
##
sub rindex($$;$)
{
  my $cnt = 0;
  my($str, $sub) = @_;
  my $len = &length($str);
  my $pos = @_ == 3 ? $_[2] : $len;
  if ($sub eq "") {
    return $pos <= 0 ? 0 : $len <= $pos ? $len : $pos;
  }
  return -1 if $pos < 0;
  my $pat = quotemeta($sub);
  my $ret = -1;
  while ($cnt <= $pos && CORE::length($str)) {
    $ret = $cnt if $str =~ /^$pat/;
    $str =~ s/^$Char//o ? $cnt++ : croak;
  }
  return $ret;
}

##
## strspn(STRING, SEARCHLIST)
##
sub strspn($$)
{
  my($str, $lst) = @_;
  my $ret = 0;
  my(%lst);
  @lst{ $lst=~ /$Char/go } = ();
  while ($str =~ /($Char)/go) {
    last if ! exists $lst{$1};
    $ret++;
  }
  return $ret;
}

##
## strcspn(STRING, SEARCHLIST)
##
sub strcspn($$)
{
  my($str, $lst) = @_;
  my $ret = 0;
  my(%lst);
  @lst{ $lst=~ /$Char/go } = ();
  while ($str =~ /($Char)/go) {
    last if exists $lst{$1};
    $ret++;
  }
  return $ret;
}

##
## substr(STRING or SCALAR REF, OFFSET; LENGTH)
## substr(SCALAR, OFFSET, LENGTH, REPLACEMENT)
##
sub substr($$;$$)
{
  my($ini, $fin, $except);
  my($arg, $off, $len, $rep) = @_;
  my $str = ref $arg ? $$arg : $arg;

  my $slen = &length($str);
  $except = 1 if $slen < $off;
  if (@_ == 2) {$len = $slen - $off }
  else {
    $except = 1 if $off + $slen < 0 && $len + $slen < 0;
    $except = 1 if 0 <= $len && $off + $len + $slen < 0;
  }
  if ($except) {
    if (@_ > 3) {
      croak "$PACKAGE outside of string in substr";
    } else { return }
  }
  $ini = $off < 0 ? $slen + $off : $off;
  $fin = $len < 0 ? $slen + $len : $ini + $len;
  $ini = 0     if $ini < 0;
  $fin = $ini  if $ini > $fin;
  $ini = $slen if $slen < $ini;
  $fin = $slen if $slen < $fin;

  my $cnt  = 0;
  my $plen = 0;
  my $clen = 0;
  while ($str =~ /($Char)/go) {
    if   ($cnt < $ini) { $plen += CORE::length($1) }
    elsif($cnt < $fin) { $clen += CORE::length($1) }
    else               { last }
    $cnt++;
  }
  if (@_ > 3) {
    $_[0] = CORE::substr($str, 0,      $plen) .$rep.
            CORE::substr($str, $plen + $clen);
  }
  return ref $arg ? \ CORE::substr($$arg, $plen, $clen)
                  :   CORE::substr($str,  $plen, $clen);
}

##
## strtr(STRING or SCALAR REF, SEARCHLIST, REPLACEMENTLIST; 
##       MODIFIER, PATTERN, TOPATTERN)
##
my %Cache;

sub strtr($$$;$$$)
{
    my $str = shift;
    my $coderef = (defined $_[2] && $_[2] =~ /o/) 
	? ( $Cache{ join "\xFF", @_ } ||= trclosure(@_) )
	: trclosure(@_);
    &$coderef($str);
}


##
## trclosure(SEARCHLIST, REPLACEMENTLIST; MODIFIER, PATTERN, TOPATTERN)
##
sub trclosure($$;$$$)
{
  my(@fr, @to, $r, $R, $c, $d, $s, $i, %hash);
  my($fr, $to, $mod, $re, $tore) = @_;

  $r = defined $mod && $mod =~ /r/;
  $R = defined $mod && $mod =~ /R/;

  if (ref $fr) {
    @fr = @$fr;
    $re = defined $re ? "$re|$Char" :
       join('|', map(quotemeta($_), @$fr), $Char);
  } else {
    $fr = scalar mkrange($fr, $r) unless $R;
    $re = defined $re ? "$re|$Char" : $Char;
    @fr = $fr =~ /\G$re/g;
  }
  if (ref $to) {
    @to = @$to;
    $tore = defined $tore ? "$tore|$Char" :
       join('|', map(quotemeta($_), @$to), $Char);
  } else {
    $to = scalar mkrange($to, $r) unless $R;
    $tore = defined $tore ? "$tore|$Char" : $re;
    @to = $to =~ /\G$tore/g;
  }

  $c = defined $mod && $mod =~ /c/;
  $d = defined $mod && $mod =~ /d/;
  $s = defined $mod && $mod =~ /s/;
  $mod = $s * 4 + $d * 2 + $c;

  for ($i = 0; $i < @fr; $i++) {
    next if exists $hash{ $fr[$i] };
    $hash{ $fr[$i] } =
    @to ? defined $to[$i] ? $to[$i] : $d ? '' : $to[-1]
        : $d && !$c ? '' : $fr[$i];
  }
  return
    $mod == 3 || $mod == 7 ?
      sub { # $c: true, $d: true, $s: true/false, $mod: 3 or 7
        my $str = shift;
        my $cnt = 0;
        (ref $str ? $$str : $str) =~ s{($re)}{
          exists $hash{$1} ? $1 : (++$cnt, '');
        }ge;
        return ref $str ? $cnt : $str;
      } :
    $mod == 5 ?
      sub { # $c: true, $d: false, $s: true, $mod: 5
        my $str = shift;
        my $cnt = 0;
        my $pre = '';
        my $now;
        (ref $str ? $$str : $str) =~ s{($re)}{
          exists $hash{$1} ? ($pre = '', $1) : (++$cnt, 
            $now = @to ? $to[-1] : $1, 
            $now eq $pre ? '' : ($pre = $now) 
          );
        }ge;
        ref $str ? $cnt : $str;
      } :
    $mod == 4 || $mod == 6 ?
      sub { # $c: false, $d: true/false, $s: true, $mod: 4 or 6
        my $str = shift;
        my $cnt = 0;
        my $pre = '';
        (ref $str ? $$str : $str) =~ s{($re)}{
          exists $hash{$1} ? (++$cnt, 
             $hash{$1} eq '' || $hash{$1} eq $pre ? '' : ($pre = $hash{$1})
          ) : ($pre = '', $1);
        }ge;
        ref $str ? $cnt : $str;
      } :
    $mod == 1 ?
      sub { # $c: true, $d: false, $s: false, $mod: 1
        my $str = shift;
        my $cnt = 0;
        (ref $str ? $$str : $str) =~ s{($re)}{
          exists $hash{$1} ? $1 : (++$cnt, @to) ? $to[-1] : $1;
        }ge;
        ref $str ? $cnt : $str;
      } :
    $mod == 0 || $mod == 2 ?
      sub { # $c: false, $d: true/false, $s: false, $mod:  0 or 2
        my $str = shift;
        my $cnt = 0;
        (ref $str ? $$str : $str) =~ s{($re)}{
          exists $hash{$1} ? (++$cnt, $hash{$1}) : $1;
        }ge;
        ref $str ? $cnt : $str;
      } :
    sub { croak "$PACKAGE Panic! Invalid closure in trclosure!\n" }
}

##
## mkrange(STRING, BOOL)
##
sub mkrange($;$)
{
  my($s, @retv, $range, $min, $max);
  my($self,$rev) = @_;
  $self =~ s/^-/\\-/;
  $range = 0;
  foreach $s ($self =~ /\G(?:\\\\|\\-|$Char)/go) {
    if($range){
      if   ($s eq '\\-') {$s = '-'}
      elsif($s eq '\\\\'){$s = '\\'}
      $min = @retv ? __ord(pop(@retv)) : 1;
      $max = __ord($s);
      push @retv, __expand($min,$max,$rev);
      $range = 0;
    } else {
      if($s eq '-'){$range = 1}
      elsif($s eq '\\-') {push @retv, '-' }
      elsif($s eq '\\\\'){push @retv, '\\'}
      else		 {push @retv, $s }
    }
  }
  push @retv, '-' if $range;
  wantarray ? @retv : @retv ? join('', @retv) : '';
}

sub __ord($)
{
  my $c = shift;
  CORE::length($c) > 1 ? unpack('n', $c) : ord($c);
}

sub vidualize_char($) # for err-msg
{
  my $c = shift;
  $c == 0 ? '\0' :
  $c < 0x20 || $c == 0x7F ? sprintf("\\x%02x", $c) :
  $c > 0xFF ? pack('n', $c) : chr($c);
}


sub __expand
{
  my($ini, $fin, $i, $ch, @retv);
  my($fin_f,$fin_t,$ini_f,$ini_t);
  my($fr, $to, $rev) = @_;
  if ($fr > $to) {
    if($rev){ ($fr,$to) = ($to,$fr) }
    else {
      croak sprintf "$PACKAGE Invalid character range %s-%s",
	vidualize_char($fr), vidualize_char($to);
    }
  } else { $rev = 0 }
  if ($fr <= 0x7F) {
    $ini = $fr < 0x00 ? 0x00 : $fr;
    $fin = $to > 0x7F ? 0x7F : $to;
    for ($i = $ini; $i <= $fin; $i++) { push @retv, chr($i) }
  }
  if ($fr <= 0xDF) {
    $ini = $fr < 0xA1 ? 0xA1 : $fr;
    $fin = $to > 0xDF ? 0xDF : $to;
    for ($i = $ini; $i <= $fin; $i++) { push @retv, chr($i) }
  }
  $ini = $fr < 0x8140 ? 0x8140 : $fr;
  $fin = $to > 0xFCFC ? 0xFCFC : $to;
  if ($ini <= $fin) {
    ($ini_f,$ini_t) = unpack 'C*', pack 'n', $ini;
    ($fin_f,$fin_t) = unpack 'C*', pack 'n', $fin;
    $ini_t = 0x40 if $ini_t < 0x40;
    $fin_t = 0xFC if $fin_t > 0xFC;
    if ($ini_f == $fin_f) {
      $ch = chr $ini_f;
      for ($i = $ini_t; $i <= $fin_t; $i++) {
        next if $i == 0x7F;
        push @retv, $ch.chr($i);
      }
    } else {
      $ch = chr($ini_f);
      for ($i = $ini_t; $i <= 0xFC; $i++) {
        next if $i == 0x7F;
        push @retv, $ch.chr($i);
      }
      for ($i = $ini_f+1; $i < $fin_f; $i++) {
        next if 0xA0 <= $i && $i <= 0xDF;
        $ch = chr($i);
        push @retv, map $ch.chr, 0x40..0x7E, 0x80..0xFC;
      }
      $ch = chr($fin_f);
      for ($i = 0x40; $i <=  $fin_t; $i++) {
        next if $i == 0x7F;
        push @retv, $ch.chr($i);
      }
    }
  }
  return $rev ? reverse(@retv) : @retv;
}

##
## spaceH2Z(STRING)
##
sub spaceH2Z($) {
  my $str = shift;
  my $len = CORE::length(ref $str ? $$str : $str);
  (ref $str ? $$str : $str) =~ s/ /\x81\x40/g;
  ref $str ? abs($len - CORE::length $$str) : $str;
};

##
## spaceZ2H(STRING)
##
## tolower(STRING)  and toupper(STRING)
##
my $spaceZ2H = trclosure('@', ' ');
my $toupper  = trclosure('a-z', 'A-Z');
my $tolower  = trclosure('A-Z', 'a-z');

sub spaceZ2H($) { &$spaceZ2H(@_) }
sub toupper($)  { &$toupper(@_) }
sub tolower($)  { &$tolower(@_) }

##
## Kana Letters
##
my $kataTRE = '(?:[\xB3\xB6-\xC4\xCA-\xCE]\xDE|[\xCA-\xCE]\xDF)';
my $hiraTRE = '(?:\x82\xA4\x81\x4A)';
my $kanaTRE = "(?:$hiraTRE|$kataTRE)";

my $kataH
 = ''
 . ''
 . '޷޸޹޺޻޼޽޾޿'
 . '޲ܶ';

my $kataZH
 = 'BuvAE@BDFHb[ACEGIJLNPRTVXZ\^'
 . '`cegijklmnqtwz}~JK'
 . 'KMOQSUWY[]_adfhorux{psvy|'
 . '';

my $hiraZH
 = 'BuvAE['
 . 'ĂƂȂɂʂ˂̂͂Ђӂւق܂݂ނ߂JK'
 . 'Âłǂ΂тԂׂڂς҂Ղ؂'
 . 'J삩';


my $kataH2Z = trclosure($kataH, $kataZH, 'R', $kanaTRE);
my $kataZ2H = trclosure($kataZH, $kataH, 'R', $kanaTRE);
my $kanaZ2H = trclosure($hiraZH.$kataZH, $kataH.$kataH, 'R', $kanaTRE);

my $kataZ
 = '@BDFHbACEGIJLNPRTVXZ\^'
 . '`cegijklmnqtwz}~'
 . 'KMOQSUWY[]_adfhorux{psvy|'
 . 'RS';

my $hiraZ
 = ''
 . 'ĂƂȂɂʂ˂̂͂Ђӂւق܂݂ނ߂'
 . 'Âłǂ΂тԂׂڂς҂Ղ؂'
 . 'J삩TU';

my $hiXka = trclosure($kataZ.$hiraZ, $hiraZ.$kataZ, 'R', $hiraTRE);
my $hi2ka = trclosure($hiraZ, $kataZ, 'R', $hiraTRE);
my $ka2hi = trclosure($kataZ, $hiraZ, 'R', $hiraTRE);

sub kataH2Z ($) { &$kataH2Z(@_) }
sub kanaH2Z ($) { &$kataH2Z(@_) }
sub kataZ2H ($) { &$kataZ2H(@_) }
sub kanaZ2H ($) { &$kanaZ2H(@_) }
sub hiXka   ($) { &$hiXka(@_) }
sub hi2ka   ($) { &$hi2ka(@_) }
sub ka2hi   ($) { &$ka2hi(@_) }

1;
__END__
