| File | /usr/share/perl5/MARC/Charset/Code.pm |
| Statements Executed | 20 |
| Total Time | 0.0009115 seconds |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::BEGIN |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::char_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::charset_name |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::charset_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::default_charset_group |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::get_escape |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::marc8_hash_code |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::marc_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::to_string |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::utf8_hash_code |
| Line | Stmts. | Exclusive Time | Avg. | Code |
|---|---|---|---|---|
| 1 | package MARC::Charset::Code; | |||
| 2 | ||||
| 3 | 3 | 27µs | 9µs | use strict; # spent 10µs making 1 call to strict::import |
| 4 | 3 | 32µs | 10µs | use warnings; # spent 28µs making 1 call to warnings::import |
| 5 | 3 | 42µs | 14µs | use base qw(Class::Accessor); # spent 1.73ms making 1 call to base::import |
| 6 | 3 | 35µs | 12µs | use Carp qw(croak); # spent 44µs making 1 call to Exporter::import |
| 7 | 3 | 33µs | 11µs | use Encode qw(encode_utf8); # spent 44µs making 1 call to Exporter::import |
| 8 | 3 | 720µs | 240µs | use MARC::Charset::Constants qw(:all); # spent 278µs making 1 call to Exporter::import |
| 9 | ||||
| 10 | 1 | 14µs | 14µs | MARC::Charset::Code # spent 288µs making 1 call to Class::Accessor::mk_accessors |
| 11 | ->mk_accessors(qw(marc ucs name charset is_combining alt)); | |||
| 12 | ||||
| 13 | =head1 NAME | |||
| 14 | ||||
| 15 | MARC::Charset::Code - represents a MARC-8/UTF-8 mapping | |||
| 16 | ||||
| 17 | =head1 SYNOPSIS | |||
| 18 | ||||
| 19 | =head1 DESCRIPTION | |||
| 20 | ||||
| 21 | Each mapping from a MARC-8 value to a UTF-8 value is represented by | |||
| 22 | a MARC::Charset::Code object in a MARC::Charset::Table. | |||
| 23 | ||||
| 24 | =head1 METHODS | |||
| 25 | ||||
| 26 | =head2 new() | |||
| 27 | ||||
| 28 | The constructor. | |||
| 29 | ||||
| 30 | =head2 name() | |||
| 31 | ||||
| 32 | A descriptive name for the code point. | |||
| 33 | ||||
| 34 | =head2 marc() | |||
| 35 | ||||
| 36 | A string representing the MARC-8 bytes codes. | |||
| 37 | ||||
| 38 | =head2 ucs() | |||
| 39 | ||||
| 40 | A string representing the UCS code point in hex. | |||
| 41 | ||||
| 42 | =head2 charset_code() | |||
| 43 | ||||
| 44 | The MARC-8 character set code. | |||
| 45 | ||||
| 46 | =head2 is_combining() | |||
| 47 | ||||
| 48 | Returns true/false to tell if the character is a combining character. | |||
| 49 | ||||
| 50 | =head2 to_string() | |||
| 51 | ||||
| 52 | A stringified version of the object suitable for pretty printing. | |||
| 53 | ||||
| 54 | =head2 char_value() | |||
| 55 | ||||
| 56 | Returns the unicode character. Essentially just a helper around | |||
| 57 | ucs(). | |||
| 58 | ||||
| 59 | =cut | |||
| 60 | ||||
| 61 | sub char_value() | |||
| 62 | { | |||
| 63 | return chr(hex(shift->ucs())); | |||
| 64 | } | |||
| 65 | ||||
| 66 | =head2 marc_value() | |||
| 67 | ||||
| 68 | The string representing the MARC-8 encoding. | |||
| 69 | ||||
| 70 | =cut | |||
| 71 | ||||
| 72 | sub marc_value | |||
| 73 | { | |||
| 74 | my $code = shift; | |||
| 75 | my $marc = $code->marc(); | |||
| 76 | return chr(hex($marc)) unless $code->charset_name eq 'CJK'; | |||
| 77 | return | |||
| 78 | chr(hex(substr($marc,0,2))) . | |||
| 79 | chr(hex(substr($marc,2,2))) . | |||
| 80 | chr(hex(substr($marc,4,2))); | |||
| 81 | } | |||
| 82 | ||||
| 83 | ||||
| 84 | =head2 charset_name() | |||
| 85 | ||||
| 86 | Returns the name of the character set, instead of the code. | |||
| 87 | ||||
| 88 | =cut | |||
| 89 | ||||
| 90 | sub charset_name() | |||
| 91 | { | |||
| 92 | return MARC::Charset::Constants::charset_name(shift->charset_value()); | |||
| 93 | } | |||
| 94 | ||||
| 95 | =head2 to_string() | |||
| 96 | ||||
| 97 | Returns a stringified version of the object. | |||
| 98 | ||||
| 99 | =cut | |||
| 100 | ||||
| 101 | sub to_string | |||
| 102 | { | |||
| 103 | my $self = shift; | |||
| 104 | my $str = | |||
| 105 | $self->name() . ': ' . | |||
| 106 | 'charset_code=' . $self->charset() . ' ' . | |||
| 107 | 'marc=' . $self->marc() . ' ' . | |||
| 108 | 'ucs=' . $self->ucs() . ' '; | |||
| 109 | ||||
| 110 | $str .= ' combining' if $self->is_combining(); | |||
| 111 | return $str; | |||
| 112 | } | |||
| 113 | ||||
| 114 | ||||
| 115 | =head2 marc8_hash_code() | |||
| 116 | ||||
| 117 | Returns a hash code for this Code object for looking up the object using | |||
| 118 | MARC8. First portion is the character set code and the second is the | |||
| 119 | MARC-8 value. | |||
| 120 | ||||
| 121 | =cut | |||
| 122 | ||||
| 123 | sub marc8_hash_code | |||
| 124 | { | |||
| 125 | my $self = shift; | |||
| 126 | return sprintf('%s:%s', $self->charset_value(), $self->marc_value()); | |||
| 127 | } | |||
| 128 | ||||
| 129 | ||||
| 130 | =head2 utf8_hash_code() | |||
| 131 | ||||
| 132 | Returns a hash code for uniquely identifying a Code by it's UCS value. | |||
| 133 | ||||
| 134 | =cut | |||
| 135 | ||||
| 136 | sub utf8_hash_code | |||
| 137 | { | |||
| 138 | return int(hex(shift->ucs())); | |||
| 139 | } | |||
| 140 | ||||
| 141 | ||||
| 142 | =head2 default_charset_group | |||
| 143 | ||||
| 144 | Returns 'G0' or 'G1' indicating where the character is typicalling used | |||
| 145 | in the MARC-8 environment. | |||
| 146 | ||||
| 147 | =cut | |||
| 148 | ||||
| 149 | sub default_charset_group | |||
| 150 | { | |||
| 151 | my $charset = shift->charset_value(); | |||
| 152 | ||||
| 153 | return 'G0' | |||
| 154 | if $charset eq ASCII_DEFAULT | |||
| 155 | or $charset eq GREEK_SYMBOLS | |||
| 156 | or $charset eq SUBSCRIPTS | |||
| 157 | or $charset eq SUPERSCRIPTS | |||
| 158 | or $charset eq BASIC_LATIN | |||
| 159 | or $charset eq BASIC_ARABIC | |||
| 160 | or $charset eq BASIC_CYRILLIC | |||
| 161 | or $charset eq BASIC_GREEK | |||
| 162 | or $charset eq BASIC_HEBREW | |||
| 163 | or $charset eq CJK; | |||
| 164 | ||||
| 165 | return 'G1'; | |||
| 166 | } | |||
| 167 | ||||
| 168 | ||||
| 169 | =head2 get_marc8_escape | |||
| 170 | ||||
| 171 | Returns an escape sequence to move to the Code from another marc-8 character | |||
| 172 | set. | |||
| 173 | ||||
| 174 | =cut | |||
| 175 | ||||
| 176 | sub get_escape | |||
| 177 | { | |||
| 178 | my $charset = shift->charset_value(); | |||
| 179 | ||||
| 180 | return ESCAPE . $charset | |||
| 181 | if $charset eq ASCII_DEFAULT | |||
| 182 | or $charset eq GREEK_SYMBOLS | |||
| 183 | or $charset eq SUBSCRIPTS | |||
| 184 | or $charset eq SUPERSCRIPTS; | |||
| 185 | ||||
| 186 | return ESCAPE . SINGLE_G0_A . $charset | |||
| 187 | if $charset eq ASCII_DEFAULT | |||
| 188 | or $charset eq BASIC_LATIN | |||
| 189 | or $charset eq BASIC_ARABIC | |||
| 190 | or $charset eq BASIC_CYRILLIC | |||
| 191 | or $charset eq BASIC_GREEK | |||
| 192 | or $charset eq BASIC_HEBREW; | |||
| 193 | ||||
| 194 | return ESCAPE . SINGLE_G1_A . $charset | |||
| 195 | if $charset eq EXTENDED_ARABIC | |||
| 196 | or $charset eq EXTENDED_LATIN | |||
| 197 | or $charset eq EXTENDED_CYRILLIC; | |||
| 198 | ||||
| 199 | return ESCAPE . MULTI_G0_A . CJK | |||
| 200 | if $charset eq CJK; | |||
| 201 | } | |||
| 202 | ||||
| 203 | =head2 charset_value | |||
| 204 | ||||
| 205 | Returns the charset value, not the hex sequence. | |||
| 206 | ||||
| 207 | =cut | |||
| 208 | ||||
| 209 | sub charset_value | |||
| 210 | { | |||
| 211 | return chr(hex(shift->charset())); | |||
| 212 | } | |||
| 213 | ||||
| 214 | ||||
| 215 | ||||
| 216 | 1 | 9µs | 9µs | 1; |