/* PSPP - computes sample statistics.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Written by Ben Pfaff <blp@gnu.org>.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA. */

#include <config.h>
#include <assert.h>
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include "common.h"
#include "error.h"
#include "getline.h"
#include "misc.h"
#include "settings.h"
#include "str.h"
#include "lexer.h"
#include "lexerP.h"
#include "var.h"
#include "cases.h"

#undef DEBUGGING
/*#define DEBUGGING 1*/
#include "debug-print.h"

/* Describes one DO REPEAT macro. */
typedef struct repeat_entry repeat_entry;
struct repeat_entry
  {
    int type;			/* 1=variable names, 0=any other. */
    char id[9];			/* Macro identifier. */
    char **replacement;		/* Macro replacement. */
    repeat_entry *next;
  };

/* List of macro identifiers. */
static repeat_entry *repeat_tab;

/* Number of substitutions for each macro. */
static int count;

/* List of lines before it's actually assigned to a file. */
static getl_line_list *line_buf_head;
static getl_line_list *line_buf_tail;

static int parse_ids (repeat_entry *);
static int parse_numbers (repeat_entry *);
static int parse_strings (repeat_entry *);
static void clean_up (void);
static int internal_cmd_do_repeat (void);

#if DEBUGGING
static void debug_print (void);
static void debug_print_lines (void);
#endif

int
cmd_do_repeat (void)
{
  if (internal_cmd_do_repeat ())
    return 1;
  clean_up ();
  return 0;
}

/* Garbage collects all the allocated memory that's no longer
   needed. */
static void
clean_up (void)
{
  repeat_entry *iter, *next;
  int i;

  iter = repeat_tab;
  repeat_tab = NULL;

  while (iter)
    {
      if (iter->replacement)
	{
	  for (i = 0; i < count; i++)
	    free (iter->replacement[i]);
	  free (iter->replacement);
	}
      next = iter->next;
      free (iter);
      iter = next;
    }
}

/* Allocates & appends another record at the end of the line_buf_tail
   chain. */
static inline void
append_record (void)
{
  getl_line_list *new = xmalloc (sizeof (getl_line_list));
  
  if (line_buf_head == NULL)
    line_buf_head = line_buf_tail = new;
  else
    line_buf_tail = line_buf_tail->next = new;
}

/* Does the real work of parsing the DO REPEAT command and its nested
   commands. */
static int
internal_cmd_do_repeat (void)
{
  /* Name of first DO REPEAT macro. */
  char first_name[9];

  /* Current filename. */
  char *current_filename = NULL; 

  /* The first step is parsing the DO REPEAT command itself. */
  match_id (DO);
  match_id (REPEAT);

  count = 0;
  line_buf_head = NULL;
  do
    {
      repeat_entry *e;
      repeat_entry *iter;
      int result;

      /* Get a stand-in variable name and make sure it's unique. */
      force_id ();
      for (iter = repeat_tab; iter; iter = iter->next)
	if (streq (iter->id, tokstr))
	  return msg (SE, _("Identifier %s is given twice."), tokstr);

      /* Make a new stand-in variable entry and link it into the
         list. */
      e = xmalloc (sizeof (repeat_entry));
      e->type = 0;
      e->next = repeat_tab;
      strcpy (e->id, tokstr);
      repeat_tab = e;

      /* Skip equals sign. */
      get_token ();
      force_match ('=');

      /* Get the details of the variable's possible values. */
      
      if (token == ID)
	result = parse_ids (e);
      else if (token == NUM)
	result = parse_numbers (e);
      else if (token == STRING)
	result = parse_strings (e);
      else
	return syntax_error (NULL);
      if (!result)
	return 0;

      /* If this is the first variable then it defines how many
	 replacements there must be; otherwise enforce this number of
	 replacements. */
      if (!count)
	{
	  count = result;
	  strcpy (first_name, e->id);
	}
      else if (count != result)
	return msg (SE, _("There must be the same number of substitutions "
			  "for each dummy variable specified.  Since there "
			  "were %d substitutions for %s, there must be %d "
			  "for %s as well, but %d were specified."),
		    count, first_name, count, e->id, result);

      /* Next! */
      match_tok ('/');
    }
  while (token != '.');

#if DEBUGGING
  debug_print ();
#endif

  /* Read all the lines inside the DO REPEAT ... END REPEAT. */
  {
    int nest = 1;

    for (;;)
      {
	if (!getl_read_line ())
	  msg (FE, _("Unexpected end of file."));

	/* If the current file has changed then record the fact. */
	if (current_filename != curfn)
	  {
	    assert (curln > 0 && curfn != NULL);
	    
	    append_record ();
	    line_buf_tail->len = -curln;
	    line_buf_tail->line = xstrdup (curfn);
	    current_filename = curfn;
	  }
	
	/* FIXME?  This code is not strictly correct, however if you
	   have begun a line with DO REPEAT or END REPEAT and it's
	   *not* a command name, then you are obviously *trying* to
	   break this mechanism.  And you will.  Also, the entire
	   command names must appear on a single line--they can't be
	   spread out. */
	{
	  char *cp = getl_buf;

	  /* Skip leading indentors and any whitespace. */
	  if (*cp == '+' || *cp == '-' || *cp == '.')
	    cp++;
	  while (isspace ((unsigned char) *cp))
	    cp++;

	  /* Find END REPEAT. */
	  if (!strncasecmp (cp, "end", 3))
	    {
	      while (isalpha ((unsigned char) *cp))
		cp++;
	      while (isspace ((unsigned char) *cp))
		cp++;
	      if (!strncasecmp (cp, "rep", 3))
		{
		  nest--;
		  if (!nest)
		    break;
		}
	    }
	  else /* Find DO REPEAT. */
	    if (!strncasecmp (cp, "do", 2))
	      {
		cp += 2;
		while (isspace ((unsigned char) *cp))
		  cp++;
		if (!strncasecmp (cp, "rep", 3))
		  nest++;
	      }
	}

	append_record ();
	line_buf_tail->len = getl_buf_len;
	line_buf_tail->line = xmalloc (getl_buf_len + 1);
	memcpy (line_buf_tail->line, getl_buf, getl_buf_len + 1);
      }
  }

  /* FIXME: For the moment we simply discard the contents of the END
     REPEAT line.  We should actually check for the PRINT specifier.
     This can be done easier when we buffer entire commands instead of
     doing it token by token; see TODO. */
  get_entire_line ();	
  
  /* Tie up the loose end of the chain. */
  if (line_buf_head == NULL)
    {
      msg (SW, _("No commands in scope."));
      return 1;
    }
  line_buf_tail->next = NULL;

  /* Show the line list. */
#if DEBUGGING
  debug_print_lines ();
#endif
  
  /* Make new variables. */
  {
    repeat_entry *iter;
    for (iter = repeat_tab; iter; iter = iter->next)
      if (iter->type == 1)
	{
	  int i;
	  for (i = 0; i < count; i++)
	    {
	      /* Note that if the variable already exists there is no
		 harm done. */
	      variable *v = create_variable (&default_dict,
					     iter->replacement[i],
					     NUMERIC, 0);

	      /* If we created the variable then we need to initialize
		 its observations to SYSMIS. */
	      if (v)
		envector (v);
	    }
	}
  }

  /* Create the DO REPEAT virtual input file. */
  {
    getl_script *script = xmalloc (sizeof (getl_script));

    script->first_line = line_buf_head;
    script->cur_line = NULL;
    script->remaining_loops = count;
    script->loop_index = -1;
    script->macros = repeat_tab;

    getl_add_DO_REPEAT_file (script);
  }

  return 1;
}

/* Parses a set of ids for DO REPEAT. */
static int
parse_ids (repeat_entry * e)
{
  int i;
  int n = 0;

  e->type = 1;
  e->replacement = NULL;

  do
    {
      char **names;
      int nnames;

      if (!parse_mixed_vars (&names, &nnames, PV_NONE))
	return 0;

      e->replacement = xrealloc (e->replacement,
				 (nnames + n) * sizeof (char *));
      for (i = 0; i < nnames; i++)
	{
	  e->replacement[n + i] = xstrdup (names[i]);
	  free (names[i]);
	}
      free (names);
      n += nnames;
    }
  while (token != '/' && token != '.');

  return n;
}

/* Stores VALUE into *REPL. */
static inline void
store_numeric (char **repl, long value)
{
  *repl = xmalloc (INT_DIGITS + 1);
  sprintf (*repl, "%ld", value);
}

/* Parses a list of numbers for DO REPEAT. */
static int
parse_numbers (repeat_entry *e)
{
  /* First and last numbers for TO, plus the step factor. */
  long a, b;

  /* Alias to e->replacement. */
  char **array;

  /* Number of entries in array; maximum number for this allocation
     size. */
  int n, m;

  n = m = 0;
  e->type = 0;
  e->replacement = array = NULL;

  do
    {
      /* Parse A TO B into a, b. */
      force_int ();
      a = tokint;

      get_token ();
      if (token == TO)
	{
	  get_token ();
	  force_int ();
	  b = tokint;

	  get_token ();
	}
      else b = a;

      if (n + (abs (b - a) + 1) > m)
	{
	  m = n + (abs (b - a) + 1) + 16;
	  e->replacement = array = xrealloc (array, m * sizeof (char *));
	}

      if (a == b)
	store_numeric (&array[n++], a);
      else
	{
	  long iter;

	  if (a < b)
	    for (iter = a; iter <= b; iter++)
	      store_numeric (&array[n++], iter);
	  else
	    for (iter = a; iter >= b; iter--)
	      store_numeric (&array[n++], iter);
	}

      match_tok (',');
    }
  while (token != '/' && token != '.');
  e->replacement = xrealloc (array, n * sizeof (char *));

  return n;
}

/* Parses a list of strings for DO REPEAT. */
int
parse_strings (repeat_entry * e)
{
  char **string;
  int n, m;

  e->type = 0;
  string = e->replacement = NULL;
  n = m = 0;

  do
    {
      if (token != STRING)
	{
	  int i;
	  msg (SE, _("String expected."));
	  for (i = 0; i < n; i++)
	    free (string[i]);
	  free (string);
	  return 0;
	}

      if (n + 1 > m)
	{
	  m += 16;
	  e->replacement = string = xrealloc (string, m * sizeof (char *));
	}
      string[n++] = get_token_representation ();
      get_token ();

      match_tok (',');
    }
  while (token != '/' && token != '.');
  e->replacement = xrealloc (string, n * sizeof (char *));

  return n;
}

int
cmd_end_repeat (void)
{
  msg (SE, _("No matching DO REPEAT."));
  return 0;
}

/* Finds a DO REPEAT macro with name MACRO_NAME and returns the
   appropriate subsitution if found, or NULL if not. */
char *
find_DO_REPEAT_substitution (char *macro_name)
{
  getl_script *s;
	    
  for (s = getl_head; s; s = s->included_from)
    {
      repeat_entry *e;
      
      if (s->first_line == NULL)
	continue;

      for (e = s->macros; e; e = e->next)
	if (!strcasecmp (e->id, macro_name))
	  return e->replacement[s->loop_index];
    }
  
  return NULL;
}

/* Makes appropriate DO REPEAT macro substitutions within getl_buf. */
void
perform_DO_REPEAT_substitutions (void)
{
  /* Are we in an apostrophized string or a quoted string? */
  int in_apos = 0, in_quote = 0;

  /* Source pointer. */
  char *cp = getl_buf;

  /* Output buffer, size, pointer. */
  char *output_buf;
  size_t output_buf_size;
  char *op;

  /* Terminal dot. */
  int dot = 0;

  output_buf_size = getl_buf_size;
  op = output_buf = xmalloc (getl_buf_size);

  /* Strip trailing whitespace, check for & remove terminal dot. */
  while (getl_buf_len > 0
	 && isspace ((unsigned char) getl_buf[getl_buf_len - 1]))
    getl_buf_len--;
  if (getl_buf_len > 0 && getl_buf[getl_buf_len - 1] == set_endcmd)
    dot = 1, getl_buf_len--;
  getl_buf[getl_buf_len] = 0;
  
  for (; cp < &getl_buf[getl_buf_len]; )
    {
      if (*cp == '\'' && !in_quote)
	in_apos ^= 1;
      else if (*cp == '"' && !in_apos)
	in_quote ^= 1;
      
      if (in_quote || in_apos || !is_id1 (*cp))
	{
	  *op++ = *cp++;
	  continue;
	}

      /* Collect an identifier. */
      {
	char name[9];
	char *start = cp;
	char *np = name;
	char *substitution;
	int subst_len;

	while (is_idn (*cp) && np < &name[8])
	  *np++ = *cp++;
	while (is_idn (*cp))
	  cp++;
	*np = 0;

	substitution = find_DO_REPEAT_substitution (name);
	if (!substitution)
	  {
	    memcpy (op, start, cp - start);
	    op += cp - start;
	    continue;
	  }

	/* Force output buffer size, copy substitution. */
	subst_len = strlen (substitution);
	{
	  size_t min_outbuf_size = ((op - output_buf) + subst_len
				    + (getl_buf + getl_buf_len - cp) + 1);

	  if (output_buf_size < min_outbuf_size)
	    {
	      output_buf_size = max (output_buf_size * 2,
				     min_outbuf_size + 16);
	      output_buf = xrealloc (output_buf, output_buf_size);
	    }
	}
	memcpy (op, substitution, subst_len);
	op += subst_len;
      }
    }
  if (dot)
    *op++ = set_endcmd;
  *op = 0;
  
  free (getl_buf);
  getl_buf = output_buf;
  getl_buf_size = output_buf_size;
  getl_buf_len = op - output_buf;
}


/* Debugging code. */

#if DEBUGGING
static void
debug_print (void)
{
  repeat_entry *iter;
  int j;

  printf ("DO REPEAT\n");
  for (iter = repeat_tab; iter; iter = iter->next)
    {
      printf ("   %s%s=", iter->id, iter->type ? "(ids)" : "");
      for (j = 0; j < count; j++)
	printf ("%s ", iter->replacement[j]);
      putc (iter->next ? '/' : '.', stdout);
      printf ("\n");
    }
}

static void
debug_print_lines (void)
{
  getl_line_list *iter;
  char *fn = "(none)";
  int ln = 65536;

  printf ("---begin DO REPEAT lines---\n");
  for (iter = line_buf_head; iter; iter = iter->next)
    {
      if (iter->len < 0)
	{
	  ln = -iter->len;
	  fn = iter->line;
	} else {
	  printf ("%s:%d: %s", fn, ln++, iter->line);
	}
    }
  printf ("---end DO REPEAT lines---\n");
}
#endif /* DEBUGGING */
