#!/usr/bin/env perl
#
# OMOP CSV Validator
# Last Modified: 2025-05-10
# License: Artistic License 2.0
# Copyright (C) 2025 Manuel Rueda - CNAG

use strict;
use warnings;
use utf8;
use FindBin qw($Bin);
use lib "$Bin/../lib";
use feature qw(say);
use Getopt::Long;
use Path::Tiny;
use OMOP::CSV::Validator;
use JSON::XS;
use Pod::Usage;
#use Data::Dumper;
use Term::ANSIColor;    # For colored output
use File::Basename;
binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

# Module version
my $VERSION = $OMOP::CSV::Validator::VERSION;

# Command-line arguments
my $ddl_file;
my $csv_file;
my $sep = ',';
my $table_name;         # Optional parameter to override table name
my $schemas_file;       # Optional: file to save schemas
my $nocolor = 0;

GetOptions(
    'ddl=s'          => \$ddl_file,        # Path to the DDL file (PostgreSQL)
    'input=s'        => \$csv_file,        # Path to the input CSV file
    'sep=s'          => \$sep,             # Field separator (default: comma)
    'table|t=s'      => \$table_name,      # Optional: override table name
    'save-schemas=s' => \$schemas_file,    # Optional: file to save schemas
    'no-color|nc'    => \$nocolor,         # Optional: Turn off STDOUT color
    'help|h'         => \my $help,         # Show help message
    'version|V'      => sub {
        say color("cyan"), "$0 Version $VERSION", color("reset");
        exit;
    },
) or pod2usage(2);

pod2usage(1) if $help;

# Turning color off if argument <--no-color>
$ENV{'ANSI_COLORS_DISABLED'} = 1 if $nocolor;

unless ( $ddl_file && $csv_file ) {
    warn color("red"), "[ERROR] --ddl and --input are required parameters.\n",
      color("reset");
    pod2usage(1);
}

# Define the hash with emoji values
my %msg_emoji = (
    error   => '❌',
    warning => '⚠️ ',
    dot     => '✖',
    success => '✅',
    save    => '💾'
);

# Read the DDL file (UTF-8)
my $ddl_text = path($ddl_file)->slurp_utf8;

# Create the validator object
my $validator = OMOP::CSV::Validator->new();

# Load schemas from the DDL
my $schemas = $validator->load_schemas_from_ddl($ddl_text);

# Optionally save schemas to a file if --save-schemas was provided
if ($schemas_file) {
    my $json = JSON::XS->new->utf8->pretty->encode($schemas);
    path($schemas_file)->spew_utf8($json);
    say color("green"), "$msg_emoji{save} Schemas saved to '$schemas_file'",
      color("reset");
}

# Determine the schema to use (either from --table or derived from CSV filename)
my ($schema, $schema_name);
if ($table_name) {
   $schema = $schemas->{ lc $table_name } or die color("red"), "$msg_emoji{error} No schema found for table '$table_name'\n", color("reset");
   $schema_name = $table_name;
}
else {
    $schema = $validator->get_schema_from_csv_filename( $csv_file, $schemas ) or die color("red"), "$msg_emoji{error} No schema found for table derived from '$csv_file'\n", color("reset");
     $schema_name = basename($csv_file);
     $schema_name =~ s/\.csv$//i;
}

# Validate the CSV file
my $errors = $validator->validate_csv_file( $csv_file, $schema, $sep );

if (@$errors) {
    say color("bold white on_red"),
      "$msg_emoji{error} Validation errors found:",
      color("reset");
    foreach my $err (@$errors) {
        say color("red"),
          "$msg_emoji{warning} Row $err->{row} validation failed:",
          color("reset");
        foreach my $msg ( @{ $err->{errors} } ) {
            say "   $msg_emoji{dot} $msg";
        }
    }
    exit 1;
}
else {
    say color("bold white on_green"),
"$msg_emoji{success} CSV file '$csv_file' is valid against the '$schema_name' schema.",
      color("reset");
    exit 0;
}

__END__

=head1 NAME

omop_csv_validator - Validate OMOP CDM CSV files against DDL-derived schemas

=head1 SYNOPSIS

  omop_csv_validator --ddl DDL.sql --input DATA.csv [--sep $'\t'] [--table person] [--save-schemas schemas.json]

=head1 OPTIONS

=over 4

=item B<--ddl>

(required) Path to the PostgreSQL DDL file defining OMOP CDM table structures.

=item B<--input>

(required) Path to the input CSV file to validate.

=item B<--sep>

CSV field separator (default: comma). For tab, use: --sep $'\t'

=item B<--table>, B<-t>

(optional) Table name to validate against. If not provided, the script will attempt
to derive the table name from the CSV filename.

=item B<--save-schemas>

(optional) Path to a file where the DDL-derived schemas should be saved (in JSON format).

=item B<--no-color>, B<-nc>

(Optional) Turn off STDOUT color

=item B<--help>, B<-h>

Display this help message.

=item B<--version>, B<-V>

Show the script's version (which corresponds to C<OMOP::CSV::Validator::VERSION>).

=back

=head1 EXAMPLE

  bin/omop_csv_validator --ddl ddl/postgres.sql --input data/person.csv --sep $'\t'
  bin/omop_csv_validator --ddl ddl/postgres.sql --input data/ANY_CSV.csv --table person
  bin/omop_csv_validator --ddl ddl/postgres.sql --input data/ANY_CSV.csv --save-schemas schemas.json

=cut
