| Filename | /Users/ap13/perl5/lib/perl5/Text/CSV_PP.pm |
| Statements | Executed 3140358 statements in 4.52s |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 18216 | 1 | 1 | 3.60s | 4.56s | Text::CSV_PP::_combine |
| 673202 | 4 | 1 | 516ms | 516ms | Text::CSV_PP::CORE:regcomp (opcode) |
| 18216 | 2 | 1 | 264ms | 7.02s | Text::CSV_PP::print |
| 673200 | 2 | 1 | 224ms | 224ms | Text::CSV_PP::CORE:subst (opcode) |
| 673206 | 4 | 1 | 218ms | 218ms | Text::CSV_PP::CORE:match (opcode) |
| 18216 | 1 | 1 | 51.2ms | 51.2ms | Text::CSV_PP::_string |
| 1 | 1 | 1 | 568µs | 568µs | Text::CSV_PP::BEGIN@970 |
| 1 | 1 | 1 | 128µs | 176µs | Text::CSV_PP::new |
| 1 | 1 | 1 | 27µs | 30µs | Text::CSV_PP::BEGIN@328 |
| 1 | 1 | 1 | 23µs | 23µs | Text::CSV_PP::BEGIN@111 |
| 1 | 1 | 1 | 23µs | 82µs | Text::CSV::ErrorDiag::BEGIN@1088 |
| 1 | 1 | 1 | 14µs | 24µs | Text::CSV_PP::_check_sanity |
| 1 | 1 | 1 | 14µs | 39µs | Text::CSV::ErrorDiag::BEGIN@1087 |
| 1 | 1 | 1 | 13µs | 13µs | Text::CSV_PP::types |
| 1 | 1 | 1 | 13µs | 25µs | Text::CSV_PP::BEGIN@10 |
| 1 | 1 | 1 | 10µs | 33µs | Text::CSV_PP::BEGIN@11 |
| 2 | 2 | 1 | 9µs | 9µs | Text::CSV_PP::CORE:qr (opcode) |
| 1 | 1 | 1 | 7µs | 21µs | Text::CSV_PP::BEGIN@114 |
| 1 | 1 | 1 | 7µs | 16µs | Text::CSV_PP::BEGIN@119 |
| 1 | 1 | 1 | 3µs | 3µs | Text::CSV_PP::BEGIN@12 |
| 0 | 0 | 0 | 0s | 0s | Text::CSV::ErrorDiag::numeric |
| 0 | 0 | 0 | 0s | 0s | Text::CSV::ErrorDiag::stringify |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::IV |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::NV |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::PV |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::SetDiag |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::__ANON__[:115] |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::__ANON__[:116] |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::__ANON__[:120] |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::__ANON__[:121] |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::__ANON__[:138] |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_check_type |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_fields |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_is_valid_utf8 |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_make_regexp_split_column |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_make_regexp_split_column_allow_sp |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_make_regexp_split_column_allow_unqout_esc |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_parse |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_return_getline_result |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::_set_error_diag |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::allow_whitespace |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::auto_diag |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::bind_columns |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::column_names |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::diag_verbose |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::eof |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::eol |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::error_diag |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::error_input |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::escape_char |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::getline |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::getline_all |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::getline_hr |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::getline_hr_all |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::is_binary |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::is_missing |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::is_quoted |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::meta_info |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::print_hr |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::quote_char |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::record_number |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::sep_char |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::status |
| 0 | 0 | 0 | 0s | 0s | Text::CSV_PP::version |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package Text::CSV_PP; | ||||
| 2 | |||||
| 3 | ################################################################################ | ||||
| 4 | # | ||||
| 5 | # Text::CSV_PP - Text::CSV_XS compatible pure-Perl module | ||||
| 6 | # | ||||
| 7 | ################################################################################ | ||||
| 8 | 1 | 43µs | require 5.005; | ||
| 9 | |||||
| 10 | 2 | 25µs | 2 | 38µs | # spent 25µs (13+12) within Text::CSV_PP::BEGIN@10 which was called:
# once (13µs+12µs) by Text::CSV::_load_pp at line 10 # spent 25µs making 1 call to Text::CSV_PP::BEGIN@10
# spent 12µs making 1 call to strict::import |
| 11 | 2 | 22µs | 2 | 56µs | # spent 33µs (10+23) within Text::CSV_PP::BEGIN@11 which was called:
# once (10µs+23µs) by Text::CSV::_load_pp at line 11 # spent 33µs making 1 call to Text::CSV_PP::BEGIN@11
# spent 23µs making 1 call to vars::import |
| 12 | 2 | 225µs | 1 | 3µs | # spent 3µs within Text::CSV_PP::BEGIN@12 which was called:
# once (3µs+0s) by Text::CSV::_load_pp at line 12 # spent 3µs making 1 call to Text::CSV_PP::BEGIN@12 |
| 13 | |||||
| 14 | 1 | 900ns | $VERSION = '1.31'; | ||
| 15 | |||||
| 16 | sub PV { 0 } | ||||
| 17 | sub IV { 1 } | ||||
| 18 | sub NV { 2 } | ||||
| 19 | |||||
| 20 | sub IS_QUOTED () { 0x0001; } | ||||
| 21 | sub IS_BINARY () { 0x0002; } | ||||
| 22 | sub IS_MISSING () { 0x0010; } | ||||
| 23 | |||||
| 24 | |||||
| 25 | 1 | 27µs | my $ERRORS = { | ||
| 26 | # PP and XS | ||||
| 27 | 1000 => "INI - constructor failed", | ||||
| 28 | 1001 => "sep_char is equal to quote_char or escape_char", | ||||
| 29 | 1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB", | ||||
| 30 | 1003 => "INI - \r or \n in main attr not allowed", | ||||
| 31 | |||||
| 32 | 2010 => "ECR - QUO char inside quotes followed by CR not part of EOL", | ||||
| 33 | 2011 => "ECR - Characters after end of quoted field", | ||||
| 34 | |||||
| 35 | 2021 => "EIQ - NL char inside quotes, binary off", | ||||
| 36 | 2022 => "EIQ - CR char inside quotes, binary off", | ||||
| 37 | 2025 => "EIQ - Loose unescaped escape", | ||||
| 38 | 2026 => "EIQ - Binary character inside quoted field, binary off", | ||||
| 39 | 2027 => "EIQ - Quoted field not terminated", | ||||
| 40 | |||||
| 41 | 2030 => "EIF - NL char inside unquoted verbatim, binary off", | ||||
| 42 | 2031 => "EIF - CR char is first char of field, not part of EOL", | ||||
| 43 | 2032 => "EIF - CR char inside unquoted, not part of EOL", | ||||
| 44 | 2034 => "EIF - Loose unescaped quote", | ||||
| 45 | 2037 => "EIF - Binary character in unquoted field, binary off", | ||||
| 46 | |||||
| 47 | 2110 => "ECB - Binary character in Combine, binary off", | ||||
| 48 | |||||
| 49 | 2200 => "EIO - print to IO failed. See errno", | ||||
| 50 | |||||
| 51 | # PP Only Error | ||||
| 52 | 4002 => "EIQ - Unescaped ESC in quoted field", | ||||
| 53 | 4003 => "EIF - ESC CR", | ||||
| 54 | 4004 => "EUF - ", | ||||
| 55 | |||||
| 56 | # Hash-Ref errors | ||||
| 57 | 3001 => "EHR - Unsupported syntax for column_names ()", | ||||
| 58 | 3002 => "EHR - getline_hr () called before column_names ()", | ||||
| 59 | 3003 => "EHR - bind_columns () and column_names () fields count mismatch", | ||||
| 60 | 3004 => "EHR - bind_columns () only accepts refs to scalars", | ||||
| 61 | 3006 => "EHR - bind_columns () did not pass enough refs for parsed fields", | ||||
| 62 | 3007 => "EHR - bind_columns needs refs to writable scalars", | ||||
| 63 | 3008 => "EHR - unexpected error in bound fields", | ||||
| 64 | 3009 => "EHR - print_hr () called before column_names ()", | ||||
| 65 | 3010 => "EHR - print_hr () called with invalid arguments", | ||||
| 66 | |||||
| 67 | 0 => "", | ||||
| 68 | }; | ||||
| 69 | |||||
| 70 | |||||
| 71 | 1 | 300ns | my $last_new_error = ''; | ||
| 72 | 1 | 100ns | my $last_new_err_num; | ||
| 73 | |||||
| 74 | 1 | 13µs | my %def_attr = ( | ||
| 75 | quote_char => '"', | ||||
| 76 | escape_char => '"', | ||||
| 77 | sep_char => ',', | ||||
| 78 | eol => defined $\ ? $\ : '', | ||||
| 79 | always_quote => 0, | ||||
| 80 | binary => 0, | ||||
| 81 | keep_meta_info => 0, | ||||
| 82 | allow_loose_quotes => 0, | ||||
| 83 | allow_loose_escapes => 0, | ||||
| 84 | allow_unquoted_escape => 0, | ||||
| 85 | allow_whitespace => 0, | ||||
| 86 | chomp_verbatim => 0, | ||||
| 87 | types => undef, | ||||
| 88 | verbatim => 0, | ||||
| 89 | blank_is_undef => 0, | ||||
| 90 | empty_is_undef => 0, | ||||
| 91 | auto_diag => 0, | ||||
| 92 | quote_space => 1, | ||||
| 93 | quote_null => 1, | ||||
| 94 | quote_binary => 1, | ||||
| 95 | diag_verbose => 0, | ||||
| 96 | |||||
| 97 | _EOF => 0, | ||||
| 98 | _RECNO => 0, | ||||
| 99 | _STATUS => undef, | ||||
| 100 | _FIELDS => undef, | ||||
| 101 | _FFLAGS => undef, | ||||
| 102 | _STRING => undef, | ||||
| 103 | _ERROR_INPUT => undef, | ||||
| 104 | _ERROR_DIAG => undef, | ||||
| 105 | |||||
| 106 | _COLUMN_NAMES => undef, | ||||
| 107 | _BOUND_COLUMNS => undef, | ||||
| 108 | ); | ||||
| 109 | |||||
| 110 | |||||
| 111 | # spent 23µs within Text::CSV_PP::BEGIN@111 which was called:
# once (23µs+0s) by Text::CSV::_load_pp at line 141 | ||||
| 112 | 3 | 21µs | if ( $] < 5.006 ) { | ||
| 113 | $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy | ||||
| 114 | 2 | 68µs | 2 | 34µs | # spent 21µs (7+13) within Text::CSV_PP::BEGIN@114 which was called:
# once (7µs+13µs) by Text::CSV::_load_pp at line 114 # spent 21µs making 1 call to Text::CSV_PP::BEGIN@114
# spent 13µs making 1 call to strict::unimport |
| 115 | *{"utf8::is_utf8"} = sub { 0; }; | ||||
| 116 | *{"utf8::decode"} = sub { }; | ||||
| 117 | } | ||||
| 118 | elsif ( $] < 5.008 ) { | ||||
| 119 | 2 | 133µs | 2 | 25µs | # spent 16µs (7+9) within Text::CSV_PP::BEGIN@119 which was called:
# once (7µs+9µs) by Text::CSV::_load_pp at line 119 # spent 16µs making 1 call to Text::CSV_PP::BEGIN@119
# spent 9µs making 1 call to strict::unimport |
| 120 | *{"utf8::is_utf8"} = sub { 0; }; | ||||
| 121 | *{"utf8::decode"} = sub { }; | ||||
| 122 | } | ||||
| 123 | elsif ( !defined &utf8::is_utf8 ) { | ||||
| 124 | require Encode; | ||||
| 125 | *utf8::is_utf8 = *Encode::is_utf8; | ||||
| 126 | } | ||||
| 127 | |||||
| 128 | eval q| require Scalar::Util |; # spent 3µs executing statements in string eval | ||||
| 129 | if ( $@ ) { | ||||
| 130 | eval q| require B |; | ||||
| 131 | if ( $@ ) { | ||||
| 132 | Carp::croak $@; | ||||
| 133 | } | ||||
| 134 | else { | ||||
| 135 | *Scalar::Util::readonly = sub (\$) { | ||||
| 136 | my $b = B::svref_2object( $_[0] ); | ||||
| 137 | $b->FLAGS & 0x00800000; # SVf_READONLY? | ||||
| 138 | } | ||||
| 139 | } | ||||
| 140 | } | ||||
| 141 | 1 | 1.33ms | 1 | 23µs | } # spent 23µs making 1 call to Text::CSV_PP::BEGIN@111 |
| 142 | |||||
| 143 | ################################################################################ | ||||
| 144 | # version | ||||
| 145 | ################################################################################ | ||||
| 146 | sub version { | ||||
| 147 | return $VERSION; | ||||
| 148 | } | ||||
| 149 | ################################################################################ | ||||
| 150 | # new | ||||
| 151 | ################################################################################ | ||||
| 152 | |||||
| 153 | # spent 24µs (14+10) within Text::CSV_PP::_check_sanity which was called:
# once (14µs+10µs) by Text::CSV_PP::new at line 195 | ||||
| 154 | 7 | 24µs | my ( $self ) = @_; | ||
| 155 | |||||
| 156 | for ( qw( sep_char quote_char escape_char ) ) { | ||||
| 157 | 3 | 10µs | ( exists $self->{$_} && defined $self->{$_} && $self->{$_} =~ m/[\r\n]/ ) and return 1003; # spent 10µs making 3 calls to Text::CSV_PP::CORE:match, avg 3µs/call | ||
| 158 | } | ||||
| 159 | |||||
| 160 | if ( $self->{allow_whitespace} and | ||||
| 161 | ( defined $self->{quote_char} && $self->{quote_char} =~ m/^[ \t]$/ ) | ||||
| 162 | || | ||||
| 163 | ( defined $self->{escape_char} && $self->{escape_char} =~ m/^[ \t]$/ ) | ||||
| 164 | ) { | ||||
| 165 | #$last_new_error = "INI - allow_whitespace with escape_char or quote_char SP or TAB"; | ||||
| 166 | #$last_new_err_num = 1002; | ||||
| 167 | return 1002; | ||||
| 168 | } | ||||
| 169 | |||||
| 170 | return 0; | ||||
| 171 | } | ||||
| 172 | |||||
| 173 | |||||
| 174 | # spent 176µs (128+47) within Text::CSV_PP::new which was called:
# once (128µs+47µs) by Text::CSV::new at line 84 of Text/CSV.pm | ||||
| 175 | 21 | 128µs | my $proto = shift; | ||
| 176 | my $attr = @_ > 0 ? shift : {}; | ||||
| 177 | |||||
| 178 | $last_new_error = 'usage: my $csv = Text::CSV_PP->new ([{ option => value, ... }]);'; | ||||
| 179 | $last_new_err_num = 1000; | ||||
| 180 | |||||
| 181 | return unless ( defined $attr and ref($attr) eq 'HASH' ); | ||||
| 182 | |||||
| 183 | my $class = ref($proto) || $proto or return; | ||||
| 184 | my $self = { %def_attr }; | ||||
| 185 | |||||
| 186 | for my $prop (keys %$attr) { # if invalid attr, return undef | ||||
| 187 | 3 | 10µs | unless ($prop =~ /^[a-z]/ && exists $def_attr{$prop}) { # spent 10µs making 3 calls to Text::CSV_PP::CORE:match, avg 3µs/call | ||
| 188 | $last_new_error = "INI - Unknown attribute '$prop'"; | ||||
| 189 | error_diag() if $attr->{ auto_diag }; | ||||
| 190 | return; | ||||
| 191 | } | ||||
| 192 | $self->{$prop} = $attr->{$prop}; | ||||
| 193 | } | ||||
| 194 | |||||
| 195 | 1 | 24µs | my $ec = _check_sanity( $self ); # spent 24µs making 1 call to Text::CSV_PP::_check_sanity | ||
| 196 | |||||
| 197 | if ( $ec ) { | ||||
| 198 | $last_new_error = $ERRORS->{ $ec }; | ||||
| 199 | $last_new_err_num = $ec; | ||||
| 200 | return; | ||||
| 201 | #$class->SetDiag ($ec); | ||||
| 202 | } | ||||
| 203 | |||||
| 204 | $last_new_error = ''; | ||||
| 205 | |||||
| 206 | defined $\ and $self->{eol} = $\; | ||||
| 207 | |||||
| 208 | bless $self, $class; | ||||
| 209 | |||||
| 210 | 1 | 13µs | $self->types( $self->{types} ) if( exists( $self->{types} ) ); # spent 13µs making 1 call to Text::CSV_PP::types | ||
| 211 | |||||
| 212 | return $self; | ||||
| 213 | } | ||||
| 214 | ################################################################################ | ||||
| 215 | # status | ||||
| 216 | ################################################################################ | ||||
| 217 | sub status { | ||||
| 218 | $_[0]->{_STATUS}; | ||||
| 219 | } | ||||
| 220 | ################################################################################ | ||||
| 221 | # error_input | ||||
| 222 | ################################################################################ | ||||
| 223 | sub error_input { | ||||
| 224 | $_[0]->{_ERROR_INPUT}; | ||||
| 225 | } | ||||
| 226 | ################################################################################ | ||||
| 227 | # error_diag | ||||
| 228 | ################################################################################ | ||||
| 229 | sub error_diag { | ||||
| 230 | my $self = shift; | ||||
| 231 | my @diag = (0, $last_new_error, 0); | ||||
| 232 | |||||
| 233 | unless ($self and ref $self) { # Class method or direct call | ||||
| 234 | $last_new_error and $diag[0] = defined $last_new_err_num ? $last_new_err_num : 1000; | ||||
| 235 | } | ||||
| 236 | elsif ( $self->isa (__PACKAGE__) and defined $self->{_ERROR_DIAG} ) { | ||||
| 237 | @diag = ( 0 + $self->{_ERROR_DIAG}, $ERRORS->{ $self->{_ERROR_DIAG} } ); | ||||
| 238 | exists $self->{_ERROR_POS} and $diag[2] = 1 + $self->{_ERROR_POS}; | ||||
| 239 | } | ||||
| 240 | |||||
| 241 | my $context = wantarray; | ||||
| 242 | |||||
| 243 | my $diagobj = bless \@diag, 'Text::CSV::ErrorDiag'; | ||||
| 244 | |||||
| 245 | unless (defined $context) { # Void context | ||||
| 246 | if ( $diag[0] ) { | ||||
| 247 | my $msg = "# CSV_PP ERROR: " . $diag[0] . " - $diag[1]\n"; | ||||
| 248 | ref $self ? ( $self->{auto_diag} > 1 ? die $msg : warn $msg ) | ||||
| 249 | : warn $msg; | ||||
| 250 | } | ||||
| 251 | return; | ||||
| 252 | } | ||||
| 253 | |||||
| 254 | return $context ? @diag : $diagobj; | ||||
| 255 | } | ||||
| 256 | |||||
| 257 | sub record_number { | ||||
| 258 | return shift->{_RECNO}; | ||||
| 259 | } | ||||
| 260 | |||||
| 261 | ################################################################################ | ||||
| 262 | # string | ||||
| 263 | ################################################################################ | ||||
| 264 | 1 | 2µs | *string = \&_string; | ||
| 265 | # spent 51.2ms within Text::CSV_PP::_string which was called 18216 times, avg 3µs/call:
# 18216 times (51.2ms+0s) by Text::CSV_PP::print at line 662, avg 3µs/call | ||||
| 266 | 18216 | 60.0ms | defined $_[0]->{_STRING} ? ${ $_[0]->{_STRING} } : undef; | ||
| 267 | } | ||||
| 268 | ################################################################################ | ||||
| 269 | # fields | ||||
| 270 | ################################################################################ | ||||
| 271 | 1 | 500ns | *fields = \&_fields; | ||
| 272 | sub _fields { | ||||
| 273 | ref($_[0]->{_FIELDS}) ? @{$_[0]->{_FIELDS}} : undef; | ||||
| 274 | } | ||||
| 275 | ################################################################################ | ||||
| 276 | # combine | ||||
| 277 | ################################################################################ | ||||
| 278 | 1 | 500ns | *combine = \&_combine; | ||
| 279 | # spent 4.56s (3.60+958ms) within Text::CSV_PP::_combine which was called 18216 times, avg 250µs/call:
# 18216 times (3.60s+958ms) by Text::CSV_PP::print at line 658, avg 250µs/call | ||||
| 280 | 3012768 | 4.22s | my ($self, @part) = @_; | ||
| 281 | |||||
| 282 | # at least one argument was given for "combining"... | ||||
| 283 | return $self->{_STATUS} = 0 unless(@part); | ||||
| 284 | |||||
| 285 | $self->{_FIELDS} = \@part; | ||||
| 286 | $self->{_ERROR_INPUT} = undef; | ||||
| 287 | $self->{_STRING} = ''; | ||||
| 288 | $self->{_STATUS} = 0; | ||||
| 289 | |||||
| 290 | my ($always_quote, $binary, $quot, $sep, $esc, $empty_is_undef, $quote_space, $quote_null, $quote_binary ) | ||||
| 291 | = @{$self}{qw/always_quote binary quote_char sep_char escape_char empty_is_undef quote_space quote_null quote_binary/}; | ||||
| 292 | |||||
| 293 | if(!defined $quot){ $quot = ''; } | ||||
| 294 | |||||
| 295 | return $self->_set_error_diag(1001) if ($sep eq $esc or $sep eq $quot); | ||||
| 296 | |||||
| 297 | 2 | 32µs | my $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$quot\E|\Q$esc\E)/; # spent 25µs making 1 call to Text::CSV_PP::CORE:regcomp
# spent 7µs making 1 call to Text::CSV_PP::CORE:qr | ||
| 298 | 2 | 13µs | my $re_sp = $self->{_re_comb_sp}->{$sep}->{$quote_space} ||= ( $quote_space ? qr/[\s\Q$sep\E]/ : qr/[\Q$sep\E]/ ); # spent 11µs making 1 call to Text::CSV_PP::CORE:regcomp
# spent 2µs making 1 call to Text::CSV_PP::CORE:qr | ||
| 299 | |||||
| 300 | my $must_be_quoted; | ||||
| 301 | for my $column (@part) { | ||||
| 302 | |||||
| 303 | unless (defined $column) { | ||||
| 304 | $column = ''; | ||||
| 305 | next; | ||||
| 306 | } | ||||
| 307 | elsif ( !$binary ) { | ||||
| 308 | $binary = 1 if utf8::is_utf8 $column; | ||||
| 309 | } | ||||
| 310 | |||||
| 311 | if (!$binary and $column =~ /[^\x09\x20-\x7E]/) { | ||||
| 312 | # an argument contained an invalid character... | ||||
| 313 | $self->{_ERROR_INPUT} = $column; | ||||
| 314 | $self->_set_error_diag(2110); | ||||
| 315 | return $self->{_STATUS}; | ||||
| 316 | } | ||||
| 317 | |||||
| 318 | $must_be_quoted = 0; | ||||
| 319 | |||||
| 320 | 673200 | 434ms | if($quot ne '' and $column =~ s/$re_esc/$esc$1/g){ # spent 292ms making 336600 calls to Text::CSV_PP::CORE:regcomp, avg 869ns/call
# spent 141ms making 336600 calls to Text::CSV_PP::CORE:subst, avg 420ns/call | ||
| 321 | $must_be_quoted++; | ||||
| 322 | } | ||||
| 323 | 673200 | 365ms | if($column =~ /$re_sp/){ # spent 224ms making 336600 calls to Text::CSV_PP::CORE:regcomp, avg 664ns/call
# spent 141ms making 336600 calls to Text::CSV_PP::CORE:match, avg 419ns/call | ||
| 324 | $must_be_quoted++; | ||||
| 325 | } | ||||
| 326 | |||||
| 327 | if( $binary and $quote_null ){ | ||||
| 328 | 2 | 3.66ms | 2 | 34µs | # spent 30µs (27+4) within Text::CSV_PP::BEGIN@328 which was called:
# once (27µs+4µs) by Text::CSV::_load_pp at line 328 # spent 30µs making 1 call to Text::CSV_PP::BEGIN@328
# spent 4µs making 1 call to bytes::import |
| 329 | 673200 | 159ms | $must_be_quoted++ if ( $column =~ s/\0/${esc}0/g || ($quote_binary && $column =~ /[\x00-\x1f\x7f-\xa0]/) ); # spent 82.3ms making 336600 calls to Text::CSV_PP::CORE:subst, avg 245ns/call
# spent 76.9ms making 336600 calls to Text::CSV_PP::CORE:match, avg 229ns/call | ||
| 330 | } | ||||
| 331 | |||||
| 332 | if($always_quote or $must_be_quoted){ | ||||
| 333 | $column = $quot . $column . $quot; | ||||
| 334 | } | ||||
| 335 | |||||
| 336 | } | ||||
| 337 | |||||
| 338 | $self->{_STRING} = \do { join($sep, @part) . ( defined $self->{eol} ? $self->{eol} : '' ) }; | ||||
| 339 | $self->{_STATUS} = 1; | ||||
| 340 | |||||
| 341 | return $self->{_STATUS}; | ||||
| 342 | } | ||||
| 343 | ################################################################################ | ||||
| 344 | # parse | ||||
| 345 | ################################################################################ | ||||
| 346 | 1 | 2µs | my %allow_eol = ("\r" => 1, "\r\n" => 1, "\n" => 1, "" => 1); | ||
| 347 | |||||
| 348 | 1 | 400ns | *parse = \&_parse; | ||
| 349 | |||||
| 350 | sub _parse { | ||||
| 351 | my ($self, $line) = @_; | ||||
| 352 | |||||
| 353 | @{$self}{qw/_STRING _FIELDS _STATUS _ERROR_INPUT/} = ( \do{ defined $line ? "$line" : undef }, undef, 0, $line ); | ||||
| 354 | |||||
| 355 | return 0 if(!defined $line); | ||||
| 356 | |||||
| 357 | my ($binary, $quot, $sep, $esc, $types, $keep_meta_info, $allow_whitespace, $eol, $blank_is_undef, $empty_is_undef, $unquot_esc) | ||||
| 358 | = @{$self}{ | ||||
| 359 | qw/binary quote_char sep_char escape_char types keep_meta_info allow_whitespace eol blank_is_undef empty_is_undef allow_unquoted_escape/ | ||||
| 360 | }; | ||||
| 361 | |||||
| 362 | $sep = ',' unless (defined $sep); | ||||
| 363 | $esc = "\0" unless (defined $esc); | ||||
| 364 | $quot = "\0" unless (defined $quot); | ||||
| 365 | |||||
| 366 | my $quot_is_null = $quot eq "\0"; # in this case, any fields are not interpreted as quoted data. | ||||
| 367 | |||||
| 368 | return $self->_set_error_diag(1001) if (($sep eq $esc or $sep eq $quot) and $sep ne "\0"); | ||||
| 369 | |||||
| 370 | my $meta_flag = $keep_meta_info ? [] : undef; | ||||
| 371 | my $re_split = $self->{_re_split}->{$quot}->{$esc}->{$sep} ||= _make_regexp_split_column($esc, $quot, $sep); | ||||
| 372 | my $re_quoted = $self->{_re_quoted}->{$quot} ||= qr/^\Q$quot\E(.*)\Q$quot\E$/s; | ||||
| 373 | my $re_in_quot_esp1 = $self->{_re_in_quot_esp1}->{$esc} ||= qr/\Q$esc\E(.)/; | ||||
| 374 | my $re_in_quot_esp2 = $self->{_re_in_quot_esp2}->{$quot}->{$esc} ||= qr/[\Q$quot$esc$sep\E0]/; | ||||
| 375 | my $re_quot_char = $self->{_re_quot_char}->{$quot} ||= qr/\Q$quot\E/; | ||||
| 376 | my $re_esc = $self->{_re_esc}->{$quot}->{$esc} ||= qr/\Q$esc\E(\Q$quot\E|\Q$esc\E|\Q$sep\E|0)/; | ||||
| 377 | my $re_invalid_quot = $self->{_re_invalid_quot}->{$quot}->{$esc} ||= qr/^$re_quot_char|[^\Q$re_esc\E]$re_quot_char/; | ||||
| 378 | |||||
| 379 | if ($allow_whitespace) { | ||||
| 380 | $re_split = $self->{_re_split_allow_sp}->{$quot}->{$esc}->{$sep} | ||||
| 381 | ||= _make_regexp_split_column_allow_sp($esc, $quot, $sep); | ||||
| 382 | } | ||||
| 383 | if ($unquot_esc) { | ||||
| 384 | $re_split = $self->{_re_split_allow_unqout_esc}->{$quot}->{$esc}->{$sep} | ||||
| 385 | ||= _make_regexp_split_column_allow_unqout_esc($esc, $quot, $sep); | ||||
| 386 | } | ||||
| 387 | |||||
| 388 | my $palatable = 1; | ||||
| 389 | my @part = (); | ||||
| 390 | |||||
| 391 | my $i = 0; | ||||
| 392 | my $flag; | ||||
| 393 | |||||
| 394 | if (defined $eol and $eol eq "\r") { | ||||
| 395 | $line =~ s/[\r ]*\r[ ]*$//; | ||||
| 396 | } | ||||
| 397 | |||||
| 398 | if ($self->{verbatim}) { | ||||
| 399 | $line .= $sep; | ||||
| 400 | } | ||||
| 401 | else { | ||||
| 402 | if (defined $eol and !$allow_eol{$eol}) { | ||||
| 403 | $line .= $sep; | ||||
| 404 | } | ||||
| 405 | else { | ||||
| 406 | $line =~ s/(?:\x0D\x0A|\x0A)?$|(?:\x0D\x0A|\x0A)[ ]*$/$sep/; | ||||
| 407 | } | ||||
| 408 | } | ||||
| 409 | |||||
| 410 | my $pos = 0; | ||||
| 411 | |||||
| 412 | my $utf8 = 1 if utf8::is_utf8( $line ); # if UTF8 marked, flag on. | ||||
| 413 | |||||
| 414 | for my $col ( $line =~ /$re_split/g ) { | ||||
| 415 | |||||
| 416 | if ($keep_meta_info) { | ||||
| 417 | $flag = 0x0000; | ||||
| 418 | $flag |= IS_BINARY if ($col =~ /[^\x09\x20-\x7E]/); | ||||
| 419 | } | ||||
| 420 | |||||
| 421 | $pos += length $col; | ||||
| 422 | |||||
| 423 | if ( ( !$binary and !$utf8 ) and $col =~ /[^\x09\x20-\x7E]/) { # Binary character, binary off | ||||
| 424 | if ( not $quot_is_null and $col =~ $re_quoted ) { | ||||
| 425 | $self->_set_error_diag( | ||||
| 426 | $col =~ /\n([^\n]*)/ ? (2021, $pos - 1 - length $1) | ||||
| 427 | : $col =~ /\r([^\r]*)/ ? (2022, $pos - 1 - length $1) | ||||
| 428 | : (2026, $pos -2) # Binary character inside quoted field, binary off | ||||
| 429 | ); | ||||
| 430 | } | ||||
| 431 | else { | ||||
| 432 | $self->_set_error_diag( | ||||
| 433 | $col =~ /\Q$quot\E(.*)\Q$quot\E\r$/ ? (2010, $pos - 2) | ||||
| 434 | : $col =~ /\n/ ? (2030, $pos - length $col) | ||||
| 435 | : $col =~ /^\r/ ? (2031, $pos - length $col) | ||||
| 436 | : $col =~ /\r([^\r]*)/ ? (2032, $pos - 1 - length $1) | ||||
| 437 | : (2037, $pos - length $col) # Binary character in unquoted field, binary off | ||||
| 438 | ); | ||||
| 439 | } | ||||
| 440 | $palatable = 0; | ||||
| 441 | last; | ||||
| 442 | } | ||||
| 443 | |||||
| 444 | if ( ($utf8 and !$binary) and $col =~ /\n|\0/ ) { # \n still needs binary (Text::CSV_XS 0.51 compat) | ||||
| 445 | $self->_set_error_diag(2021, $pos); | ||||
| 446 | $palatable = 0; | ||||
| 447 | last; | ||||
| 448 | } | ||||
| 449 | |||||
| 450 | if ( not $quot_is_null and $col =~ $re_quoted ) { | ||||
| 451 | $flag |= IS_QUOTED if ($keep_meta_info); | ||||
| 452 | $col = $1; | ||||
| 453 | |||||
| 454 | my $flag_in_quot_esp; | ||||
| 455 | while ( $col =~ /$re_in_quot_esp1/g ) { | ||||
| 456 | my $str = $1; | ||||
| 457 | $flag_in_quot_esp = 1; | ||||
| 458 | |||||
| 459 | if ($str !~ $re_in_quot_esp2) { | ||||
| 460 | |||||
| 461 | unless ($self->{allow_loose_escapes}) { | ||||
| 462 | $self->_set_error_diag( 2025, $pos - 2 ); # Needless ESC in quoted field | ||||
| 463 | $palatable = 0; | ||||
| 464 | last; | ||||
| 465 | } | ||||
| 466 | |||||
| 467 | unless ($self->{allow_loose_quotes}) { | ||||
| 468 | $col =~ s/\Q$esc\E(.)/$1/g; | ||||
| 469 | } | ||||
| 470 | } | ||||
| 471 | |||||
| 472 | } | ||||
| 473 | |||||
| 474 | last unless ( $palatable ); | ||||
| 475 | |||||
| 476 | unless ( $flag_in_quot_esp ) { | ||||
| 477 | if ($col =~ /(?<!\Q$esc\E)\Q$esc\E/) { | ||||
| 478 | $self->_set_error_diag( 4002, $pos - 1 ); # No escaped ESC in quoted field | ||||
| 479 | $palatable = 0; | ||||
| 480 | last; | ||||
| 481 | } | ||||
| 482 | } | ||||
| 483 | |||||
| 484 | $col =~ s{$re_esc}{$1 eq '0' ? "\0" : $1}eg; | ||||
| 485 | |||||
| 486 | if ( $empty_is_undef and length($col) == 0 ) { | ||||
| 487 | $col = undef; | ||||
| 488 | } | ||||
| 489 | |||||
| 490 | if ($types and $types->[$i]) { # IV or NV | ||||
| 491 | _check_type(\$col, $types->[$i]); | ||||
| 492 | } | ||||
| 493 | |||||
| 494 | } | ||||
| 495 | |||||
| 496 | # quoted but invalid | ||||
| 497 | |||||
| 498 | elsif ( not $quot_is_null and $col =~ $re_invalid_quot ) { | ||||
| 499 | |||||
| 500 | unless ($self->{allow_loose_quotes} and $col =~ /$re_quot_char/) { | ||||
| 501 | $self->_set_error_diag( | ||||
| 502 | $col =~ /^\Q$quot\E(.*)\Q$quot\E.$/s ? (2011, $pos - 2) | ||||
| 503 | : $col =~ /^$re_quot_char/ ? (2027, $pos - 1) | ||||
| 504 | : (2034, $pos - length $col) # Loose unescaped quote | ||||
| 505 | ); | ||||
| 506 | $palatable = 0; | ||||
| 507 | last; | ||||
| 508 | } | ||||
| 509 | |||||
| 510 | } | ||||
| 511 | |||||
| 512 | elsif ($types and $types->[$i]) { # IV or NV | ||||
| 513 | _check_type(\$col, $types->[$i]); | ||||
| 514 | } | ||||
| 515 | |||||
| 516 | # unquoted | ||||
| 517 | |||||
| 518 | else { | ||||
| 519 | |||||
| 520 | if (!$self->{verbatim} and $col =~ /\r\n|\n/) { | ||||
| 521 | $col =~ s/(?:\r\n|\n).*$//sm; | ||||
| 522 | } | ||||
| 523 | |||||
| 524 | if ($col =~ /\Q$esc\E\r$/) { # for t/15_flags : test 165 'ESC CR' at line 203 | ||||
| 525 | $self->_set_error_diag( 4003, $pos ); | ||||
| 526 | $palatable = 0; | ||||
| 527 | last; | ||||
| 528 | } | ||||
| 529 | |||||
| 530 | if ($col =~ /.\Q$esc\E$/) { # for t/65_allow : test 53-54 parse('foo\') at line 62, 65 | ||||
| 531 | $self->_set_error_diag( 4004, $pos ); | ||||
| 532 | $palatable = 0; | ||||
| 533 | last; | ||||
| 534 | } | ||||
| 535 | |||||
| 536 | if ( $col eq '' and $blank_is_undef ) { | ||||
| 537 | $col = undef; | ||||
| 538 | } | ||||
| 539 | |||||
| 540 | if ( $empty_is_undef and length($col) == 0 ) { | ||||
| 541 | $col = undef; | ||||
| 542 | } | ||||
| 543 | |||||
| 544 | if ( $unquot_esc ) { | ||||
| 545 | $col =~ s/\Q$esc\E(.)/$1/g; | ||||
| 546 | } | ||||
| 547 | |||||
| 548 | } | ||||
| 549 | |||||
| 550 | utf8::encode($col) if $utf8; | ||||
| 551 | if ( defined $col && _is_valid_utf8($col) ) { | ||||
| 552 | utf8::decode($col); | ||||
| 553 | } | ||||
| 554 | |||||
| 555 | push @part,$col; | ||||
| 556 | push @{$meta_flag}, $flag if ($keep_meta_info); | ||||
| 557 | $self->{ _RECNO }++; | ||||
| 558 | |||||
| 559 | $i++; | ||||
| 560 | } | ||||
| 561 | |||||
| 562 | if ($palatable and ! @part) { | ||||
| 563 | $palatable = 0; | ||||
| 564 | } | ||||
| 565 | |||||
| 566 | if ($palatable) { | ||||
| 567 | $self->{_ERROR_INPUT} = undef; | ||||
| 568 | $self->{_FIELDS} = \@part; | ||||
| 569 | } | ||||
| 570 | |||||
| 571 | $self->{_FFLAGS} = $keep_meta_info ? $meta_flag : []; | ||||
| 572 | |||||
| 573 | return $self->{_STATUS} = $palatable; | ||||
| 574 | } | ||||
| 575 | |||||
| 576 | |||||
| 577 | sub _make_regexp_split_column { | ||||
| 578 | my ($esc, $quot, $sep) = @_; | ||||
| 579 | |||||
| 580 | if ( $quot eq '' ) { | ||||
| 581 | return qr/([^\Q$sep\E]*)\Q$sep\E/s; | ||||
| 582 | } | ||||
| 583 | |||||
| 584 | return qr/( | ||||
| 585 | \Q$quot\E | ||||
| 586 | [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* | ||||
| 587 | \Q$quot\E | ||||
| 588 | | # or | ||||
| 589 | \Q$quot\E | ||||
| 590 | (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* | ||||
| 591 | \Q$quot\E | ||||
| 592 | | # or | ||||
| 593 | [^\Q$sep\E]* | ||||
| 594 | ) | ||||
| 595 | \Q$sep\E | ||||
| 596 | /xs; | ||||
| 597 | } | ||||
| 598 | |||||
| 599 | |||||
| 600 | sub _make_regexp_split_column_allow_unqout_esc { | ||||
| 601 | my ($esc, $quot, $sep) = @_; | ||||
| 602 | |||||
| 603 | return qr/( | ||||
| 604 | \Q$quot\E | ||||
| 605 | [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc\E0][^\Q$quot$esc\E]*)* | ||||
| 606 | \Q$quot\E | ||||
| 607 | | # or | ||||
| 608 | \Q$quot\E | ||||
| 609 | (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* | ||||
| 610 | \Q$quot\E | ||||
| 611 | | # or | ||||
| 612 | (?:\Q$esc\E[\Q$quot$esc$sep\E0]|[^\Q$quot$esc$sep\E])* | ||||
| 613 | | # or | ||||
| 614 | [^\Q$sep\E]* | ||||
| 615 | ) | ||||
| 616 | \Q$sep\E | ||||
| 617 | /xs; | ||||
| 618 | } | ||||
| 619 | |||||
| 620 | |||||
| 621 | sub _make_regexp_split_column_allow_sp { | ||||
| 622 | my ($esc, $quot, $sep) = @_; | ||||
| 623 | |||||
| 624 | # if separator is space or tab, don't count that separator | ||||
| 625 | # as whitespace --- patched by Mike O'Sullivan | ||||
| 626 | my $ws = $sep eq ' ' ? '[\x09]' | ||||
| 627 | : $sep eq "\t" ? '[\x20]' | ||||
| 628 | : '[\x20\x09]' | ||||
| 629 | ; | ||||
| 630 | |||||
| 631 | if ( $quot eq '' ) { | ||||
| 632 | return qr/$ws*([^\Q$sep\E]?)$ws*\Q$sep\E$ws*/s; | ||||
| 633 | } | ||||
| 634 | |||||
| 635 | qr/$ws* | ||||
| 636 | ( | ||||
| 637 | \Q$quot\E | ||||
| 638 | [^\Q$quot$esc\E]*(?:\Q$esc\E[\Q$quot$esc$sep\E0][^\Q$quot$esc\E]*)* | ||||
| 639 | \Q$quot\E | ||||
| 640 | | # or | ||||
| 641 | [^\Q$sep\E]*? | ||||
| 642 | ) | ||||
| 643 | $ws*\Q$sep\E$ws* | ||||
| 644 | /xs; | ||||
| 645 | } | ||||
| 646 | ################################################################################ | ||||
| 647 | |||||
| 648 | ################################################################################ | ||||
| 649 | # spent 7.02s (264ms+6.75) within Text::CSV_PP::print which was called 18216 times, avg 385µs/call:
# 18215 times (264ms+6.75s) by Bio::Roary::GroupStatistics::create_spreadsheet at line 197 of lib/Bio/Roary/GroupStatistics.pm, avg 385µs/call
# once (65µs+391µs) by Bio::Roary::GroupStatistics::create_spreadsheet at line 186 of lib/Bio/Roary/GroupStatistics.pm | ||||
| 650 | 109296 | 226ms | my ($self, $io, $cols) = @_; | ||
| 651 | |||||
| 652 | require IO::Handle; | ||||
| 653 | |||||
| 654 | if(ref($cols) ne 'ARRAY'){ | ||||
| 655 | Carp::croak("Expected fields to be an array ref"); | ||||
| 656 | } | ||||
| 657 | |||||
| 658 | 18216 | 4.56s | $self->_combine(@$cols) or return ''; # spent 4.56s making 18216 calls to Text::CSV_PP::_combine, avg 250µs/call | ||
| 659 | |||||
| 660 | local $\ = ''; | ||||
| 661 | |||||
| 662 | 36432 | 2.19s | $io->print( $self->_string ) or $self->_set_error_diag(2200); # spent 2.14s making 18216 calls to IO::Handle::print, avg 118µs/call
# spent 51.2ms making 18216 calls to Text::CSV_PP::_string, avg 3µs/call | ||
| 663 | } | ||||
| 664 | |||||
| 665 | sub print_hr { | ||||
| 666 | my ($self, $io, $hr) = @_; | ||||
| 667 | $self->{_COLUMN_NAMES} or $self->_set_error_diag(3009); | ||||
| 668 | ref $hr eq "HASH" or $self->_set_error_diag(3010); | ||||
| 669 | $self->print ($io, [ map { $hr->{$_} } $self->column_names ]); | ||||
| 670 | } | ||||
| 671 | ################################################################################ | ||||
| 672 | # getline | ||||
| 673 | ################################################################################ | ||||
| 674 | sub getline { | ||||
| 675 | my ($self, $io) = @_; | ||||
| 676 | |||||
| 677 | require IO::Handle; | ||||
| 678 | |||||
| 679 | $self->{_EOF} = eof($io) ? 1 : ''; | ||||
| 680 | |||||
| 681 | my $quot = $self->{quote_char}; | ||||
| 682 | my $sep = $self->{sep_char}; | ||||
| 683 | my $re = defined $quot ? qr/(?:\Q$quot\E)/ : undef; | ||||
| 684 | |||||
| 685 | my $eol = $self->{eol}; | ||||
| 686 | |||||
| 687 | local $/ = $eol if ( defined $eol and $eol ne '' ); | ||||
| 688 | |||||
| 689 | my $line = $io->getline(); | ||||
| 690 | |||||
| 691 | # AUTO DETECTION EOL CR | ||||
| 692 | if ( defined $line and defined $eol and $eol eq '' and $line =~ /[^\r]\r[^\r\n]/ and eof ) { | ||||
| 693 | $self->{_AUTO_DETECT_CR} = 1; | ||||
| 694 | $self->{eol} = "\r"; | ||||
| 695 | seek( $io, 0, 0 ); # restart | ||||
| 696 | return $self->getline( $io ); | ||||
| 697 | } | ||||
| 698 | |||||
| 699 | if ( $re and defined $line ) { | ||||
| 700 | LOOP: { | ||||
| 701 | my $is_continued = scalar(my @list = $line =~ /$re/g) % 2; # if line is valid, quot is even | ||||
| 702 | |||||
| 703 | if ( $self->{allow_loose_quotes } ) { | ||||
| 704 | $is_continued = 0; | ||||
| 705 | } | ||||
| 706 | elsif ( $line =~ /${re}0/ ) { # null suspicion case | ||||
| 707 | $is_continued = $line =~ qr/ | ||||
| 708 | ^ | ||||
| 709 | ( | ||||
| 710 | (?: | ||||
| 711 | $re # $quote | ||||
| 712 | (?: | ||||
| 713 | $re$re # escaped $quote | ||||
| 714 | | ${re}0 # or escaped zero | ||||
| 715 | | [^$quot] # or exceptions of $quote | ||||
| 716 | )* | ||||
| 717 | $re # $quote | ||||
| 718 | [^0$quot] # non zero or $quote | ||||
| 719 | ) | ||||
| 720 | | | ||||
| 721 | (?:[^$quot]*) # exceptions of $quote | ||||
| 722 | )+ | ||||
| 723 | $ | ||||
| 724 | /x ? 0 : 1; | ||||
| 725 | } | ||||
| 726 | |||||
| 727 | if ( $is_continued and !eof($io) ) { | ||||
| 728 | $line .= $io->getline(); | ||||
| 729 | goto LOOP; | ||||
| 730 | } | ||||
| 731 | } | ||||
| 732 | } | ||||
| 733 | |||||
| 734 | $line =~ s/\Q$eol\E$// if ( defined $line and defined $eol and $eol ne '' ); | ||||
| 735 | |||||
| 736 | $self->_parse($line); | ||||
| 737 | |||||
| 738 | return $self->_return_getline_result(); | ||||
| 739 | } | ||||
| 740 | |||||
| 741 | |||||
| 742 | sub _return_getline_result { | ||||
| 743 | |||||
| 744 | if ( eof ) { | ||||
| 745 | $_[0]->{_AUTO_DETECT_CR} = 0; | ||||
| 746 | } | ||||
| 747 | |||||
| 748 | return unless $_[0]->{_STATUS}; | ||||
| 749 | |||||
| 750 | return [ $_[0]->_fields() ] unless $_[0]->{_BOUND_COLUMNS}; | ||||
| 751 | |||||
| 752 | my @vals = $_[0]->_fields(); | ||||
| 753 | my ( $max, $count ) = ( scalar @vals, 0 ); | ||||
| 754 | |||||
| 755 | if ( @{ $_[0]->{_BOUND_COLUMNS} } < $max ) { | ||||
| 756 | $_[0]->_set_error_diag(3006); | ||||
| 757 | return; | ||||
| 758 | } | ||||
| 759 | |||||
| 760 | for ( my $i = 0; $i < $max; $i++ ) { | ||||
| 761 | my $bind = $_[0]->{_BOUND_COLUMNS}->[ $i ]; | ||||
| 762 | if ( Scalar::Util::readonly( $$bind ) ) { | ||||
| 763 | $_[0]->_set_error_diag(3008); | ||||
| 764 | return; | ||||
| 765 | } | ||||
| 766 | $$bind = $vals[ $i ]; | ||||
| 767 | } | ||||
| 768 | |||||
| 769 | return []; | ||||
| 770 | } | ||||
| 771 | ################################################################################ | ||||
| 772 | # getline_all | ||||
| 773 | ################################################################################ | ||||
| 774 | sub getline_all { | ||||
| 775 | my ( $self, $io, $offset, $len ) = @_; | ||||
| 776 | my @list; | ||||
| 777 | my $tail; | ||||
| 778 | my $n = 0; | ||||
| 779 | |||||
| 780 | $offset ||= 0; | ||||
| 781 | |||||
| 782 | if ( $offset < 0 ) { | ||||
| 783 | $tail = -$offset; | ||||
| 784 | $offset = 0; | ||||
| 785 | } | ||||
| 786 | |||||
| 787 | while ( my $row = $self->getline($io) ) { | ||||
| 788 | next if $offset && $offset-- > 0; # skip | ||||
| 789 | last if defined $len && !$tail && $n >= $len; # exceedes limit size | ||||
| 790 | push @list, $row; | ||||
| 791 | ++$n; | ||||
| 792 | if ( $tail && $n > $tail ) { | ||||
| 793 | shift @list; | ||||
| 794 | } | ||||
| 795 | } | ||||
| 796 | |||||
| 797 | if ( $tail && defined $len && $n > $len ) { | ||||
| 798 | @list = splice( @list, 0, $len); | ||||
| 799 | } | ||||
| 800 | |||||
| 801 | return \@list; | ||||
| 802 | } | ||||
| 803 | ################################################################################ | ||||
| 804 | # getline_hr | ||||
| 805 | ################################################################################ | ||||
| 806 | sub getline_hr { | ||||
| 807 | my ( $self, $io) = @_; | ||||
| 808 | my %hr; | ||||
| 809 | |||||
| 810 | unless ( $self->{_COLUMN_NAMES} ) { | ||||
| 811 | $self->SetDiag( 3002 ); | ||||
| 812 | } | ||||
| 813 | |||||
| 814 | my $fr = $self->getline( $io ) or return undef; | ||||
| 815 | |||||
| 816 | if ( ref $self->{_FFLAGS} ) { | ||||
| 817 | $self->{_FFLAGS}[$_] = IS_MISSING for ($#{$fr} + 1) .. $#{$self->{_COLUMN_NAMES}}; | ||||
| 818 | } | ||||
| 819 | |||||
| 820 | @hr{ @{ $self->{_COLUMN_NAMES} } } = @$fr; | ||||
| 821 | |||||
| 822 | \%hr; | ||||
| 823 | } | ||||
| 824 | ################################################################################ | ||||
| 825 | # getline_hr_all | ||||
| 826 | ################################################################################ | ||||
| 827 | sub getline_hr_all { | ||||
| 828 | my ( $self, $io, @args ) = @_; | ||||
| 829 | my %hr; | ||||
| 830 | |||||
| 831 | unless ( $self->{_COLUMN_NAMES} ) { | ||||
| 832 | $self->SetDiag( 3002 ); | ||||
| 833 | } | ||||
| 834 | |||||
| 835 | my @cn = @{$self->{_COLUMN_NAMES}}; | ||||
| 836 | |||||
| 837 | return [ map { my %h; @h{ @cn } = @$_; \%h } @{ $self->getline_all( $io, @args ) } ]; | ||||
| 838 | } | ||||
| 839 | ################################################################################ | ||||
| 840 | # column_names | ||||
| 841 | ################################################################################ | ||||
| 842 | sub column_names { | ||||
| 843 | my ( $self, @columns ) = @_; | ||||
| 844 | |||||
| 845 | @columns or return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : undef; | ||||
| 846 | @columns == 1 && ! defined $columns[0] and return $self->{_COLUMN_NAMES} = undef; | ||||
| 847 | |||||
| 848 | if ( @columns == 1 && ref $columns[0] eq "ARRAY" ) { | ||||
| 849 | @columns = @{ $columns[0] }; | ||||
| 850 | } | ||||
| 851 | elsif ( join "", map { defined $_ ? ref $_ : "" } @columns ) { | ||||
| 852 | $self->SetDiag( 3001 ); | ||||
| 853 | } | ||||
| 854 | |||||
| 855 | if ( $self->{_BOUND_COLUMNS} && @columns != @{$self->{_BOUND_COLUMNS}} ) { | ||||
| 856 | $self->SetDiag( 3003 ); | ||||
| 857 | } | ||||
| 858 | |||||
| 859 | $self->{_COLUMN_NAMES} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @columns ]; | ||||
| 860 | @{ $self->{_COLUMN_NAMES} }; | ||||
| 861 | } | ||||
| 862 | ################################################################################ | ||||
| 863 | # bind_columns | ||||
| 864 | ################################################################################ | ||||
| 865 | sub bind_columns { | ||||
| 866 | my ( $self, @refs ) = @_; | ||||
| 867 | |||||
| 868 | @refs or return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef; | ||||
| 869 | @refs == 1 && ! defined $refs[0] and return $self->{_BOUND_COLUMNS} = undef; | ||||
| 870 | |||||
| 871 | if ( $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} ) { | ||||
| 872 | $self->SetDiag( 3003 ); | ||||
| 873 | } | ||||
| 874 | |||||
| 875 | if ( grep { ref $_ ne "SCALAR" } @refs ) { # why don't use grep? | ||||
| 876 | $self->SetDiag( 3004 ); | ||||
| 877 | } | ||||
| 878 | |||||
| 879 | $self->{_is_bound} = scalar @refs; #pack("C", scalar @refs); | ||||
| 880 | $self->{_BOUND_COLUMNS} = [ @refs ]; | ||||
| 881 | @refs; | ||||
| 882 | } | ||||
| 883 | ################################################################################ | ||||
| 884 | # eof | ||||
| 885 | ################################################################################ | ||||
| 886 | sub eof { | ||||
| 887 | $_[0]->{_EOF}; | ||||
| 888 | } | ||||
| 889 | ################################################################################ | ||||
| 890 | # type | ||||
| 891 | ################################################################################ | ||||
| 892 | # spent 13µs within Text::CSV_PP::types which was called:
# once (13µs+0s) by Text::CSV_PP::new at line 210 | ||||
| 893 | 6 | 12µs | my $self = shift; | ||
| 894 | |||||
| 895 | if (@_) { | ||||
| 896 | if (my $types = shift) { | ||||
| 897 | $self->{'_types'} = join("", map{ chr($_) } @$types); | ||||
| 898 | $self->{'types'} = $types; | ||||
| 899 | } | ||||
| 900 | else { | ||||
| 901 | delete $self->{'types'}; | ||||
| 902 | delete $self->{'_types'}; | ||||
| 903 | undef; | ||||
| 904 | } | ||||
| 905 | } | ||||
| 906 | else { | ||||
| 907 | $self->{'types'}; | ||||
| 908 | } | ||||
| 909 | } | ||||
| 910 | ################################################################################ | ||||
| 911 | sub meta_info { | ||||
| 912 | $_[0]->{_FFLAGS} ? @{ $_[0]->{_FFLAGS} } : undef; | ||||
| 913 | } | ||||
| 914 | |||||
| 915 | sub is_quoted { | ||||
| 916 | return unless (defined $_[0]->{_FFLAGS}); | ||||
| 917 | return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } ); | ||||
| 918 | |||||
| 919 | $_[0]->{_FFLAGS}->[$_[1]] & IS_QUOTED ? 1 : 0; | ||||
| 920 | } | ||||
| 921 | |||||
| 922 | sub is_binary { | ||||
| 923 | return unless (defined $_[0]->{_FFLAGS}); | ||||
| 924 | return if( $_[1] =~ /\D/ or $_[1] < 0 or $_[1] > $#{ $_[0]->{_FFLAGS} } ); | ||||
| 925 | $_[0]->{_FFLAGS}->[$_[1]] & IS_BINARY ? 1 : 0; | ||||
| 926 | } | ||||
| 927 | |||||
| 928 | sub is_missing { | ||||
| 929 | my ($self, $idx, $val) = @_; | ||||
| 930 | ref $self->{_FFLAGS} && | ||||
| 931 | $idx >= 0 && $idx < @{$self->{_FFLAGS}} or return; | ||||
| 932 | $self->{_FFLAGS}[$idx] & IS_MISSING ? 1 : 0; | ||||
| 933 | } | ||||
| 934 | ################################################################################ | ||||
| 935 | # _check_type | ||||
| 936 | # take an arg as scalar referrence. | ||||
| 937 | # if not numeric, make the value 0. otherwise INTEGERized. | ||||
| 938 | ################################################################################ | ||||
| 939 | sub _check_type { | ||||
| 940 | my ($col_ref, $type) = @_; | ||||
| 941 | unless ($$col_ref =~ /^[+-]?(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) { | ||||
| 942 | Carp::carp sprintf("Argument \"%s\" isn't numeric in subroutine entry",$$col_ref); | ||||
| 943 | $$col_ref = 0; | ||||
| 944 | } | ||||
| 945 | elsif ($type == NV) { | ||||
| 946 | $$col_ref = sprintf("%G",$$col_ref); | ||||
| 947 | } | ||||
| 948 | else { | ||||
| 949 | $$col_ref = sprintf("%d",$$col_ref); | ||||
| 950 | } | ||||
| 951 | } | ||||
| 952 | ################################################################################ | ||||
| 953 | # _set_error_diag | ||||
| 954 | ################################################################################ | ||||
| 955 | sub _set_error_diag { | ||||
| 956 | my ( $self, $error, $pos ) = @_; | ||||
| 957 | |||||
| 958 | $self->{_ERROR_DIAG} = $error; | ||||
| 959 | |||||
| 960 | if (defined $pos) { | ||||
| 961 | $_[0]->{_ERROR_POS} = $pos; | ||||
| 962 | } | ||||
| 963 | |||||
| 964 | $self->error_diag() if ( $error and $self->{auto_diag} ); | ||||
| 965 | |||||
| 966 | return; | ||||
| 967 | } | ||||
| 968 | ################################################################################ | ||||
| 969 | |||||
| 970 | # spent 568µs within Text::CSV_PP::BEGIN@970 which was called:
# once (568µs+0s) by Text::CSV::_load_pp at line 981 | ||||
| 971 | 13 | 565µs | for my $method ( qw/always_quote binary keep_meta_info allow_loose_quotes allow_loose_escapes | ||
| 972 | verbatim blank_is_undef empty_is_undef quote_space quote_null | ||||
| 973 | quote_binary allow_unquoted_escape/ ) { | ||||
| 974 | eval qq| | ||||
| 975 | sub $method { | ||||
| 976 | \$_[0]->{$method} = defined \$_[1] ? \$_[1] : 0 if (\@_ > 1); | ||||
| 977 | \$_[0]->{$method}; | ||||
| 978 | } | ||||
| 979 | |; | ||||
| 980 | } | ||||
| 981 | 1 | 640µs | 1 | 568µs | } # spent 568µs making 1 call to Text::CSV_PP::BEGIN@970 |
| 982 | |||||
| - - | |||||
| 985 | sub sep_char { | ||||
| 986 | my $self = shift; | ||||
| 987 | if ( @_ ) { | ||||
| 988 | $self->{sep_char} = $_[0]; | ||||
| 989 | my $ec = _check_sanity( $self ); | ||||
| 990 | $ec and Carp::croak( $self->SetDiag( $ec ) ); | ||||
| 991 | } | ||||
| 992 | $self->{sep_char}; | ||||
| 993 | } | ||||
| 994 | |||||
| 995 | |||||
| 996 | sub quote_char { | ||||
| 997 | my $self = shift; | ||||
| 998 | if ( @_ ) { | ||||
| 999 | $self->{quote_char} = $_[0]; | ||||
| 1000 | my $ec = _check_sanity( $self ); | ||||
| 1001 | $ec and Carp::croak( $self->SetDiag( $ec ) ); | ||||
| 1002 | } | ||||
| 1003 | $self->{quote_char}; | ||||
| 1004 | } | ||||
| 1005 | |||||
| 1006 | |||||
| 1007 | sub escape_char { | ||||
| 1008 | my $self = shift; | ||||
| 1009 | if ( @_ ) { | ||||
| 1010 | $self->{escape_char} = $_[0]; | ||||
| 1011 | my $ec = _check_sanity( $self ); | ||||
| 1012 | $ec and Carp::croak( $self->SetDiag( $ec ) ); | ||||
| 1013 | } | ||||
| 1014 | $self->{escape_char}; | ||||
| 1015 | } | ||||
| 1016 | |||||
| 1017 | |||||
| 1018 | sub allow_whitespace { | ||||
| 1019 | my $self = shift; | ||||
| 1020 | if ( @_ ) { | ||||
| 1021 | my $aw = shift; | ||||
| 1022 | $aw and | ||||
| 1023 | (defined $self->{quote_char} && $self->{quote_char} =~ m/^[ \t]$/) || | ||||
| 1024 | (defined $self->{escape_char} && $self->{escape_char} =~ m/^[ \t]$/) | ||||
| 1025 | and Carp::croak ($self->SetDiag (1002)); | ||||
| 1026 | $self->{allow_whitespace} = $aw; | ||||
| 1027 | } | ||||
| 1028 | $self->{allow_whitespace}; | ||||
| 1029 | } | ||||
| 1030 | |||||
| 1031 | |||||
| 1032 | sub eol { | ||||
| 1033 | $_[0]->{eol} = defined $_[1] ? $_[1] : '' if ( @_ > 1 ); | ||||
| 1034 | $_[0]->{eol}; | ||||
| 1035 | } | ||||
| 1036 | |||||
| 1037 | |||||
| 1038 | sub SetDiag { | ||||
| 1039 | if ( defined $_[1] and $_[1] == 0 ) { | ||||
| 1040 | $_[0]->{_ERROR_DIAG} = undef; | ||||
| 1041 | $last_new_error = ''; | ||||
| 1042 | return; | ||||
| 1043 | } | ||||
| 1044 | |||||
| 1045 | $_[0]->_set_error_diag( $_[1] ); | ||||
| 1046 | Carp::croak( $_[0]->error_diag . '' ); | ||||
| 1047 | } | ||||
| 1048 | |||||
| 1049 | sub auto_diag { | ||||
| 1050 | my $self = shift; | ||||
| 1051 | if (@_) { | ||||
| 1052 | my $v = shift; | ||||
| 1053 | !defined $v || $v eq "" and $v = 0; | ||||
| 1054 | $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false | ||||
| 1055 | $self->{auto_diag} = $v; | ||||
| 1056 | } | ||||
| 1057 | $self->{auto_diag}; | ||||
| 1058 | } | ||||
| 1059 | |||||
| 1060 | sub diag_verbose { | ||||
| 1061 | my $self = shift; | ||||
| 1062 | if (@_) { | ||||
| 1063 | my $v = shift; | ||||
| 1064 | !defined $v || $v eq "" and $v = 0; | ||||
| 1065 | $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false | ||||
| 1066 | $self->{diag_verbose} = $v; | ||||
| 1067 | } | ||||
| 1068 | $self->{diag_verbose}; | ||||
| 1069 | } | ||||
| 1070 | |||||
| 1071 | sub _is_valid_utf8 { | ||||
| 1072 | return ( $_[0] =~ /^(?: | ||||
| 1073 | [\x00-\x7F] | ||||
| 1074 | |[\xC2-\xDF][\x80-\xBF] | ||||
| 1075 | |[\xE0][\xA0-\xBF][\x80-\xBF] | ||||
| 1076 | |[\xE1-\xEC][\x80-\xBF][\x80-\xBF] | ||||
| 1077 | |[\xED][\x80-\x9F][\x80-\xBF] | ||||
| 1078 | |[\xEE-\xEF][\x80-\xBF][\x80-\xBF] | ||||
| 1079 | |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | ||||
| 1080 | |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | ||||
| 1081 | |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | ||||
| 1082 | )+$/x ) ? 1 : 0; | ||||
| 1083 | } | ||||
| 1084 | ################################################################################ | ||||
| 1085 | package Text::CSV::ErrorDiag; | ||||
| 1086 | |||||
| 1087 | 2 | 72µs | 2 | 64µs | # spent 39µs (14+25) within Text::CSV::ErrorDiag::BEGIN@1087 which was called:
# once (14µs+25µs) by Text::CSV::_load_pp at line 1087 # spent 39µs making 1 call to Text::CSV::ErrorDiag::BEGIN@1087
# spent 25µs making 1 call to strict::import |
| 1088 | # spent 82µs (23+59) within Text::CSV::ErrorDiag::BEGIN@1088 which was called:
# once (23µs+59µs) by Text::CSV::_load_pp at line 1094 | ||||
| 1089 | 2 | 18µs | '""' => \&stringify, | ||
| 1090 | '+' => \&numeric, | ||||
| 1091 | '-' => \&numeric, | ||||
| 1092 | '*' => \&numeric, | ||||
| 1093 | '/' => \&numeric, | ||||
| 1094 | 1 | 184µs | 2 | 141µs | ); # spent 82µs making 1 call to Text::CSV::ErrorDiag::BEGIN@1088
# spent 59µs making 1 call to overload::import |
| 1095 | |||||
| 1096 | |||||
| 1097 | sub numeric { | ||||
| 1098 | my ($left, $right) = @_; | ||||
| 1099 | return ref $left ? $left->[0] : $right->[0]; | ||||
| 1100 | } | ||||
| 1101 | |||||
| 1102 | |||||
| 1103 | sub stringify { | ||||
| 1104 | $_[0]->[1]; | ||||
| 1105 | } | ||||
| 1106 | ################################################################################ | ||||
| 1107 | 1 | 44µs | 1; | ||
| 1108 | __END__ | ||||
# spent 218ms within Text::CSV_PP::CORE:match which was called 673206 times, avg 324ns/call:
# 336600 times (141ms+0s) by Text::CSV_PP::_combine at line 323, avg 419ns/call
# 336600 times (76.9ms+0s) by Text::CSV_PP::_combine at line 329, avg 229ns/call
# 3 times (10µs+0s) by Text::CSV_PP::_check_sanity at line 157, avg 3µs/call
# 3 times (10µs+0s) by Text::CSV_PP::new at line 187, avg 3µs/call | |||||
sub Text::CSV_PP::CORE:qr; # opcode | |||||
# spent 516ms within Text::CSV_PP::CORE:regcomp which was called 673202 times, avg 767ns/call:
# 336600 times (292ms+0s) by Text::CSV_PP::_combine at line 320, avg 869ns/call
# 336600 times (224ms+0s) by Text::CSV_PP::_combine at line 323, avg 664ns/call
# once (25µs+0s) by Text::CSV_PP::_combine at line 297
# once (11µs+0s) by Text::CSV_PP::_combine at line 298 | |||||
sub Text::CSV_PP::CORE:subst; # opcode |