package Math::Expression::Evaluator;
use strict;
use Math::Expression::Evaluator::Lexer qw(lex);
#use Data::Dumper;
use Carp;

use Regexp::Common;
use Math::Trig qw(atan asin acos tan);

our $VERSION = '0.0.1';

=head1 NAME

Math::Expression::Evaluator - parses and evaluates mathematic expressions

=head1 SYNOPSIS

    use Math::Expression::Evaluator;
    my $m = new Math::Expression::Evaluator;

    print $m->parse("a = 12; a*3")->val(), "\n";
    # prints 36
    print $m->parse("2^(a/3)")->val(), "\n";
    # prints 8 (ie 2**3)
    print $m->parse("a / b")->val({ b => 6 }), "\n";
    # prints 36
    print $m->parse("log2(16)")->val(), "\n";
    # prints 4


=head1 DESCRIPTION

Math::Expression::Evaluator is a simple, recursive descending parser for 
mathematical expressions. It can handle normal arithmetics 
(includings powers ^), builtin functions like sin() and variables.

Multiple exressions can be seperated by whitespaces or by semicolons ';'. 
In case of multiple expressions the value of the last expression is 
returned.

Variables can be assigned with a single '=' sign, their name has to start 
with a alphabetic character or underscore C<[a-zA-Z_]>, and may contain 
alphabetic characters, digits and underscores.

Values for variables can also be provided as a hash ref as a parameter
to val(). In case of collision the explicitly provided value is used:

   $m->parse("a = 2; a")->val({a => 1}); 

will return 1, not 2.

The following builtin functions are supported atm:

=over 4

=item *

trignometric functions: sin, cos, tan

=item *

inverse trigonomic functions: asin, acos, atan

=item *

Square root: sqrt

=item * 

exponentials: exp, sinh, cosh

=item *

logarithms: log, log2, log10

=item * 

constants: pi() (you need the parenthesis to distinguish it from the 
variable pi)

=item *

other: theta (theta(x) = 1 for x > 0, theta(x) = 0 for x < 0)

=back

=head1 METHODS

=over 2

=item new

generates a new MathExpr object. accepts an optional argument, a hash ref
that contains configurations. If this hash sets force_semicolon to true, 
expressions have to be separated by a semicolon ';'.

=item parse

Takes a string as argument, and generates an AST that is stored internally.

Returns a reference to the object, so that method calls can be chained:

    print MathExpr->new->parse("1+2")->val;

Parse failures cause this method to die with a stack trace. 


=item val 

Executes the AST generated by parse(), and returns the number that the 
expression is evaluated to. It accepts an optional hash reference that
contain values for variables:

    my $m = new MathExpr;
    $m->parse("(x - 1) / (x + 1)");
    foreach (0 .. 10) {
        print $_, "\t", $m->val({x => $_}), "\n";
    }

=back

=cut

sub new {
    my $class = shift;
    my $self = bless {}, $class;
    $self->{tokens} = [];
    $self->{variables} = {};
    
    my $first = shift;

    if (defined $first){
        if (ref $first){
            $self->{config} = $first;
            $self->parse(shift) if @_;
        } else {
            $self->parse($first);
        }
    }

    return $self;
}

my @input_tokens = (
        ['ExpOp'            => '\^'],
        ['MulOp'            => '(?:\*|/)'],
        ['AddOp'            => '(?:\+|-)'],
# Float needs to be behind AddOp because its regexp has an 
# optional +/- in it
        ['Float'            => "(?:$RE{num}{real})"],
# Int is superflous because it is covered in the Float regexp
        ['OpenParen'        => '\('],
        ['ClosingParen'     => '\)'],
        ['Colon'            => ';'],
        ['Comma'            => ','],
        ['AssignmentOp'     => '='],
        ['Name'             => '[a-zA-Z_][a-zA-Z_0-9]*'],
        ['Whitespace'       => '\s+', sub {return undef}],
        ['Comment'          => qr/\#.*?$/, sub {return undef}],
);

# returns the next not-yet-parsed token
sub _next_token {
    my $self = shift;
    return $self->{tokens}[$self->{token_pointer}];
}

# checks if the next token is what you expected, for example
# _is_next_token("AddOp") checks, if the next token is a '+' or '-'
sub _is_next_token {
    my $self = shift;
    my $cmp = shift;
    if (defined $self->_next_token() && $self->_next_token()->[0] eq $cmp){
        return $self->_next_token->[1];
    }
}

# basically the same _is_next_token, but does an arbitrary number of lookahead 
# steps. An asterisk '*' stands for an arbitrary token.
sub _lookahead {
    my $self = shift;
    my $i = 0;
    while (my $v = shift){
        return undef unless($self->{tokens}[$self->{token_pointer}+$i]);
        if ($v eq "*") {
            $i++; 
            next;
        }
        my $ref = $self->{tokens}[$self->{token_pointer} + $i]->[0];
        return undef unless($ref eq  $v);
        $i++;
    }
    return 1;
}

# move the token pointer one step further.
sub _proceed {
    my $self = shift;
    $self->{token_pointer}++;
}

# parse a text into an AST, stores the AST in $self->{ast}
sub parse {
    my ($self, $text) = @_;

    my @tokens =  lex($text, \@input_tokens);
    $self->{tokens} = \@tokens;
    $self->{token_pointer} = 0;
    $self->{ast} = $self->_simplify_ast($self->_program());
    return $self;
}

# program -> statement*
# parse a program, e.g. a collection of statements. 
# The corrsponding AST looks like this: ['{', $s1, $s2, $s3, ... ]
sub _program {
    my $self = shift;
    my @res = ('{');
    while (defined $self->_next_token()){
        push @res, $self->_statement();
    }
    return \@res;
}

# generates an error message that something was expected but not found, 
# for example 'a + +' would warn that a value was expected, but an AddOp 
# was found.
sub _expected {
    my $self = shift;
    if (scalar @_ > 1){
        confess("Expected $_[0]; got: $_[1]\n");
    } else {
        confess ("Expected: ", $_[0], $/);
    }
}


# matches a specific token, and returns its text if successfull. Dies if
# unsuccessfull.
sub _match {
    my $self = shift;
    my $m = shift;
    my $val;
    confess("Expected $m, got EOF") unless ref $self->_next_token();
    if ($self->_next_token()->[0] eq $m){
        my $next = shift;
        if ($next && $next ne $self->_next_token()->[1]){
            $self->_expected($next, $self->_next_token()->[1]);
        }
        $val = $self->_next_token()->[1];
        $self->_proceed();
        return $val;
    } else {
        $self->_expected($m, $self->_next_token()->[0]);
    }
}

# <value> -> <float> | <name> | <function_call>
# parses a single value: a float, a function call or a variable name
# returns the corresponding AST.
sub _value {
    my $self = shift;
    if ($self->_lookahead("Name", "OpenParen")){
        return $self->_function_call();
    } elsif ($self->_is_next_token("Name")){
        return $self->_get_variable();
    } else {
        return $self->_match("Float");
    }
}

# <function_call> -> <name> '(' [<expression> [',' <expression>]* ]? ')'
# parses a function call, the AST looks like this: ['&', $name, @args]
sub _function_call {
    my $self = shift;
    my @res = ('&', $self->_match("Name"));
    $self->_match("OpenParen");
    if ($self->_is_next_token("ClosingParen")){
        $self->_proceed();
        return \@res;
    } 
    push @res, $self->_expression();
    while ($self->_is_next_token("Comma")){
        $self->_proceed();
        push @res, $self->_expression();
    }
    $self->_match("ClosingParen");
    return \@res;
}

# <name> -> m/[a-zA-Z_]\w*/
# parses a variable name, and returns it
sub _get_variable {
    my $self = shift;
    my $var_name = $self->_match("Name");
    return $var_name;
}


# checks if the given AST represents a lvalue of an _assignment
sub _is_lvalue {
    my $self = shift;
    my $ast = $self->_simplify_ast(shift);
    if (! ref($ast) && $ast =~ m/^[a-zA-Z_]/){
# simple variable name
        return 1;
    } else {
        return 0;
    }
}

# <statement> -> <_assignment> | <expression>
# parses a statement, eg an _assignment or an expression.
sub _statement {
    my $self = shift;
    my $e = $self->_expression();
    if ($self->_is_next_token("AssignmentOp")){
        $e = $self->_assignment($e);
    }

    if ($self->{config}->{force_semicolon}){
# forced semicolon at the and of a statement, but last statement
# isn't forced to have one.
        if ($self->_next_token()){
            $self->_match("Colon");
        }
    } else {
# optional semicolon at end of statement
        if ($self->_is_next_token("Colon")){
            $self->_proceed();
        }
    }
    return $e;
}

# <_assignment> ::= <lvalue> '=' <expression>
# expects the lvalue as first argument
sub _assignment {
    my $self = shift;
    my $e = shift;
    $self->_match("AssignmentOp");
    my $val = $self->_expression();
    if ($self->_is_lvalue($e)){
        return ['=', $e, $val];
    } else {
        confess("Not an lvalue in _assignment");
    }
}


# <term> ::= <exponential> [('*'|'/') <exponential>]*
# the AST is a bit weird, a simple product is expressed as
# ['*', $v1, $v2, ... ]
# a division is a bit more complex:
# a / b / c becomes ['*', a, ['/', b], ['/', c]]
sub _term {
    my $self = shift;
    my $val = $self->_exponential();
    my @res = ('*', $val);
    while (my $op = $self->_is_next_token("MulOp")){
        if ($op eq '*'){
            $self->_proceed();
            push @res, $self->_exponential();
        } elsif ($op eq '/'){
            $self->_proceed();
            push @res, ['/',  $self->_exponential()];
        } else {
            die "Don't know how to handle MulOp $op\n";
        }
    }
    return \@res;
}

# <expression> ::= ['+'|'-']? <term> [('+'|'-') term]*
sub _expression {
    my $self = shift;
#	print STDERR "expression...\n";
    my @res = ('+');
    if (my $op = $self->_is_next_token("AddOp")){
# unary +/-
        $self->_proceed();
        if ($op eq '+'){
            push @res, $self->_term();
        } else {
            push @res, ['-', $self->_term()];
        }
    } else {
        push @res, $self->_term();
    }
    while (my $op = $self->_is_next_token("AddOp")){
        if ($op eq '+'){
            $self->_proceed();
            push @res, $self->_term();
        } elsif ($op eq '-'){
            $self->_proceed();
            push @res, ['-', $self->_term()];
        } else {
            confess("weird things...\n");
        }
    }
    return \@res;
}

# <factor> ::= <value> | '(' <expression> ')'
sub _factor {
    my $self = shift;
    my $val;
    if ($self->_is_next_token("OpenParen")){
        $self->_match("OpenParen", '(');
        $val = $self->_expression();
        $self->_match("ClosingParen", ')');
    } else {
        $val = $self->_value();
    }
    return $val;
}

# <exponential> ::= <factor> [ '^' <factor>]?
# note that 2**3**4 is not defined
sub _exponential {
    my $self = shift;
    my $val = $self->_factor();
    if ($self->_is_next_token("ExpOp")){
        $self->_match("ExpOp");
        return ['^', $val, $self->_factor()];
    } else {
        return $val;

    }
}

# simplify an AST.
# atm just reduces constructs like ['*', $a] to $a
sub _simplify_ast {
    my $self = shift;
    my $ast = shift;
    return $ast unless ref $ast;
    my @a = @$ast;
    my @simplifiable = ('+', '*', '{');
    if (scalar @a == 2 && (grep {$_ eq $a[0]} @simplifiable) ){
#       print STDERR "Simplifying op $a[0]...\n";
        # turns ['+', $foo] into $foo
        return $self->_simplify_ast($a[1]);
    }
    my @res;
    for (@a){
        push @res, $self->_simplify_ast($_);
    }
    return \@res;
}

# evaluates an arbitrary AST, and returns its value
sub _execute {
    my $self = shift;
    my %dispatch = (
            '/' => sub {my $self = shift; 1 / $self->_execute(shift)},
            '-' => sub {my $self = shift; 0 - $self->_execute(shift)},
            '+' => \&_exec_sum,
            '*' => \&_exec_mul,
            '^' => sub {my $self = shift; $self->_execute(shift) **  $self->_execute(shift)},
            '=' => \&_exec_assignment,
            '&' => \&_exec_function_call,
            '{' => \&_exec_block,
    );
    my $ast = shift;
#   $ast = $self->{ast} unless defined($ast);
#   print STDERR "Executing " . Dumper($self->{ast});
    if (ref $ast ){
        my @a = @$ast;
        my $op = shift @a;
        if (my $fun = $dispatch{$op}){
            return &$fun($self, @a);
        } else {
            confess ("Operator '$op' not yet implemented\n");
        }
    } else {
#       warn "Literal '$ast' found";
        if ($ast =~ m/^\d/){
#           warn("Found a number");
            return $ast;
        } else {
            # name is always a variable name
            return $self->_variable_lookup($ast);
        }
    }
}

# executes a sum
sub _exec_sum {
    my $self = shift;
    my $sum = 0;
    foreach (@_){
        $sum += $self->_execute($_);
    }
    return $sum;
}

# executes a value
sub val {
    my $self = shift;
    $self->{temp_vars} = shift || {};
    my $res =  $self->_execute($self->{ast});
    $self->{temp_vars} = {};
    return $res;
}

# executes a block, eg a list of statements
sub _exec_block {
    my $self = shift;
#   warn "Executing block: ". Dumper(\@_);
    my $res;
    foreach (@_){
        $res = $self->_execute($_);
    }
    return $res;
}

# executes a multiplication 
sub _exec_mul {
    my $self = shift;
    my $prod = 1;
    foreach (@_){
        $prod *= $self->_execute($_);
    }
    return $prod;
}

# executes an _assignment
sub _exec_assignment {
    my $self = shift;
    my $lvalue = shift;
    return $self->{variables}{$lvalue} = $self->_execute(shift);
}

# executes a function call
# currently only builtins are supported
sub _exec_function_call {
    my $self = shift;
    my $name = shift;
    my %builtin_dispatch = (
            'sqrt'  => sub { sqrt $_[0] },
            'sin'   => sub { sin  $_[0] },
            'asin'  => sub { asin $_[0] },
            'cos'   => sub { cos  $_[0] },
            'acos'  => sub { acos $_[0] },
            'tan'   => sub { tan  $_[0] },
            'atan'  => sub { atan $_[0] },
            'exp'   => sub { exp  $_[0] },
            'log'   => sub { log  $_[0] },
            'sinh'  => sub { (exp($_[0]) - exp(-$_[0]))/2},
            'cosh'  => sub { (exp($_[0]) + exp(-$_[0]))/2},
            'log10' => sub { log($_[0]) / log(10) },
            'log2'  => sub { log($_[0]) / log(2) },
            'theta' => sub { $_[0] > 0 ? 1 : 0 },
            'pi'    => sub { 3.141592653589793 },

            );
    if (my $fun = $builtin_dispatch{$name}){
        return &$fun(map {$self->_execute($_)} @_);
    } else {
        confess("Unknown function: $name");
    }
}

# checks if a variable is defined, and returns its value
sub _variable_lookup {
    my ($self, $var) = @_;
    if (exists $self->{temp_vars} && exists $self->{temp_vars}->{$var}){
        return $self->{temp_vars}->{$var};
    } elsif (exists $self->{variables}->{$var}){
        return $self->{variables}->{$var};
    } else {
        confess("Variable '$var' not defined");
    }
}

1;

# vim: sw=4 ts=4 expandtab
