/* PSPP - computes sample statistics.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Written by Ben Pfaff <blp@gnu.org>.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA. */

#include <config.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <assert.h>
#include <stdarg.h>
#include "error.h"
#include "getline.h"
#include "misc.h"
#include "output.h"
#include "lexer.h"
#include "lexerP.h"
#include "settings.h"
#include "str.h"
#include "common.h"

/* Current token. */
int token;

/* For NUM token, the token's value. */
double tokval;

/* For ID, STRING, and TAGGED_QUOTE tokens, the token's value. */
unsigned char *tokstr;

/* Number of bytes allocated for tokstr. */
int tokstr_size;

/* Length of tokstr. */
int tokstr_len;

/* For ID token, the full ID without truncation to 8 characters. */
unsigned char *toklongstr;

/* For NUM token, token's value if it's an integer, NOT_LONG otherwise. */
long tokint;

static char *prog;		/* Pointer to next token in getl_buf. */
static int dot;			/* 1=this line ends with a terminal dot */
static int eof;			/* 1=the last token returned was EOF */
static int put;			/* If nonzero, next token returned by yylex().
				   Used only in exceptional circumstances. */

/* Character classes for the beginning of tokens. */
#define COTHER 1		/* invalid character */
#define CSTR 2			/* string */
#define CID 3			/* identifier (probably) */
#define CDELIM 4		/* single-character delimiter or operator */
#define CSTAR 5			/* `*' or `**' */
#define CNUM 6			/* digit or decimal point */
#define CLT 7			/* < or <= or <> */
#define CGT 8			/* > or >= */
#define CNE 9			/* ~ or ~= */
#define CAND 10			/* & */
#define COR 11			/* | */
#define CTAGGEDQUOTE 12		/* { */

/* PORTME: Table of ASCII character classes corresponding to C*
   constants. */
static char tbl[256] =
{
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 2, 3, 3, 1, 10, 2, 4, 4, 5, 4, 4, 6, 6, 4,
  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 7, 4, 8, 1,

  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 12, 11, 1, 9, 1,

  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};

/* FYI: Identifiers begin with a letter or one of the characters @ #
   $; the remaining characters may also be one of the additional characters
   characters . _. */

/* PORTME: Boolean table of ASCII characters that may appear following
   the first character in an identifier. */
static char id[256] =
{
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,

  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,

  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

static char une[] = N_("Unexpected end of file.");

int get_line (void);

/* Causes the rest of the current input line to be ignored for
   tokenization purposes. */
void
discard_line (void)
{
  msg (SW, _("The rest of this command has been discarded."));
  *getl_buf = 0;
  prog = getl_buf;
  dot = 0;
  put = 0;
}

/* Discards the rest of the current input line for tokenization
   purposes, but returns the entire contents of the line for use by
   the caller. */
char *
get_entire_line (void)
{
  prog = &getl_buf[getl_buf_len];
  dot = 0;
  return getl_buf;
}

/* As get_entire_line(), but only returns the part of the current line
   that hasn't already been tokenized. */
char *
get_rest_of_line (void)
{
  char *s = prog;
  prog = &getl_buf[getl_buf_len];
  dot = 0;
  return s;
}

/* As get_rest_of_line(), but stores *HAD_DOT as nonzero if the line
   ends with a terminal dot, or zero if it doesn't. */
char *
get_dotted_rest_of_line (int *had_dot)
{
  *had_dot = dot;
  return get_rest_of_line ();
}

/* Converts a number between 0 and 15 inclusive to a `hexit'
   [0-9A-F]. */
static inline int
make_hexit (int x)
{
  /* PORTME: This routine might be broken on non-ASCII character
     sets. */
  return (x < 10) ? (x + '0') : (x - 10 + 'A');
}

/* Prints a syntax error message containing the current token and
   given message MESSAGE, if non-NULL.  Returns 0. */
int
syntax_error (const char *message, ...)
{
  char *token_rep = get_token_representation ();
  char errbuf[1024];
  
  if (message)
    {
      va_list args;
      
      va_start (args, message);
      vsprintf (errbuf, message, args);
      va_end (args);
      message = errbuf;
    }

  if (token_rep[0] == 0)
    msg (SE, _("Syntax error at end of file."));
  else if (message)
    msg (SE, _("Syntax error %s at `%s'."), message, token_rep);
  else
    msg (SE, _("Syntax error at `%s'."), token_rep);
  
  free (token_rep);
  return 0;
}
  
/* Returns an ASCII representation of the current token as a
   malloc()'d string. */
char *
get_token_representation (void)
{
  char *token_rep;
  
  switch (token)
    {
    case ID:
    case NUM:
      return xstrdup (tokstr);
      break;
    case STRING:
      {
	int hexstring = 0;
	unsigned char *sp, *dp;

	/* PORTME. */
	for (sp = (unsigned char *) tokstr; sp < &tokstr[tokstr_len]; sp++)
	  if (*sp < 32 || *sp > 126)
	    {
	      hexstring = 1;
	      break;
	    }
	      
	token_rep = xmalloc (2 + tokstr_len * 2 + 1 + 1);
	dp = token_rep;
	if (hexstring)
	  *dp++ = 'X';
	*dp++ = '\'';
	if (hexstring == 0)
	  for (sp = (unsigned char *) tokstr; *sp; )
	    {
	      if (*sp == '\'')
		*dp++ = '\'';
	      *dp++ = *sp++;
	    }
	else
	  for (sp = (unsigned char *) tokstr; sp < &tokstr[tokstr_len]; sp++)
	    {
	      *dp++ = make_hexit ((*((unsigned char *) sp)) >> 4);
	      *dp++ = make_hexit ((*((unsigned char *) sp)) & 0xf);
	    }
	*dp++ = '\'';
	*dp = 0;
	
	return token_rep;
      }
    break;
    case STOP:
      token_rep = xmalloc (1);
      *token_rep = 0;
      return token_rep;
    default:
      {
	static const char *kw_tab[EXP - AND + 1] = 
	  {
	    "AND", "OR", "NOT", "EQ", "GE", "GT", "LE", "LT", "NE",
	    "ALL", "BY", "TO", "WITH", "**",
	  };

	if (token >= AND && token <= EXP)
	  return xstrdup (kw_tab [token - AND]);
	else
	  {
	    token_rep = xmalloc (2);
	    token_rep[0] = token;
	    token_rep[1] = 0;
	    return token_rep;
	  }
      }
    }
	
  assert (0);
}

/* Makes the current token become the next token to be read; the
   current token is set to T. */
void
putback (int t)
{
  put = token;
  token = t;
}

/* Makes T the next token read. */
void
putfwd (int t)
{
  put = t;
}

/* Most of the time, a `-' is a lead-in to a negative number.  But
   sometimes it's actually part of the syntax.  If a dash can be part
   of syntax then this function is called to rip it off of a
   number. */
void
convert_negative_to_dash (void)
{
  if (token == NUM && tokval < 0.0)
    {
      token = '-';
      tokval = -tokval;
      if (tokint != NOT_LONG)
	tokint = -tokint;
      memmove (tokstr, &tokstr[1], tokstr_len);
      tokstr_len--;
      putfwd (NUM);
    }
}
   
/* Sets the current position in the current line to P, which must be
   in getl_buf. */
void
set_prog (char *p)
{
  prog = p;
}

/* Initializes the lexer. */
void
init_lex (void)
{
  if (!get_line ())
    msg (FE, gettext (une));
}

/* We're not at eof any more. */
void
reset_eof (void)
{
  eof = 0;
}

/* Returns the first character of the next token.  But if the next
   token is not an identifier, the character returned will not be a
   character that can begin an identifier.  Specifically, the
   hexstring lead-in X' causes lookahead() to return '.  Note that an
   alphanumeric return value doesn't guarantee an ID token, it could
   also be a reserved-word token. */
int
lookahead (void)
{
  if (put)
    return put;

  while (1)
    {
      if (eof)
	msg (FE, gettext (une));

      while (1)
	{
	  while (isspace ((unsigned char) *prog))
	    prog++;
	  if (*prog)
	    break;
	  if (dot)
	    return '.';
	  else if (!get_line ())
	    msg (FE, gettext (une));
	  if (put)
	    return put;
	}
      if ((toupper ((unsigned char) (*prog)) == 'X'
	   || toupper ((unsigned char) (*prog)) == 'B')
	  && (prog[1] == '\'' || prog[1] == '"'))
	return '\'';
      return *prog;
    }
}

/* Returns the proper token type, either ID or a reserved keyword
   enum, for tokstr[], which must contain LEN characters. */
static inline int
check_id (int len)
{
  switch (len)
    {
    case 2:
      switch (tokstr[0])
	{
	case 'B':
	  if (tokstr[1] == 'Y')
	    return BY;
	  return ID;
	case 'E':
	  if (tokstr[1] == 'Q')
	    return EQ;
	  return ID;
	case 'G':
	  if (tokstr[1] == 'T')
	    return GT;
	  if (tokstr[1] == 'E')
	    return GE;
	  return ID;
	case 'L':
	  if (tokstr[1] == 'T')
	    return LT;
	  if (tokstr[1] == 'E')
	    return LE;
	case 'N':
	  if (tokstr[1] == 'E')
	    return NE;
	  return ID;
	case 'O':
	  if (tokstr[1] == 'R')
	    return OR;
	  return ID;
	case 'T':
	  if (tokstr[1] == 'O')
	    return TO;
	default:
	  return ID;
	}
    case 3:
      switch (tokstr[1])
	{
	case 'L':
	  if (tokstr[0] == 'A' && tokstr[2] == 'L')
	    return ALL;
	  return ID;
	case 'N':
	  if (tokstr[0] == 'A' && tokstr[2] == 'D')
	    return AND;
	  return ID;
	case 'O':
	  if (tokstr[0] == 'N' && tokstr[2] == 'T')
	    return NOT;
	default:
	  return ID;
	}
    case 4:
      if (tokstr[0] == 'W' && tokstr[1] == 'I'
	  && tokstr[2] == 'T' && tokstr[3] == 'H')
	return WITH;
    default:
      return ID;
    }
}

static int parse_tagged_quote (void);
static int parse_string (int base);

/* Parses a single token and returns its value.  Also sets appropriate
   global variable to indicate the token's attributes. */
int
yylex (void)
{
  /* If a token was pushed ahead, return it. */
  if (put)
    {
      int save = put;
      put = 0;
      return save;
    }

  /* Find a token. */
  for (;;)
    {
      char *cp;

      /* Skip whitespace. */
      if (eof)
	msg (FE, gettext (une));
      while (1)
	{
	  while (isspace ((unsigned char) *prog))
	    prog++;
	  if (*prog)
	    break;
	  if (dot)
	    {
	      dot = 0;
	      return '.';
	    }
	  else if (!get_line ())
	    {
	      eof = 1;
	      return STOP;
	    }
	  if (put)
	    {
	      int save = put;
	      put = 0;
	      return save;
	    }
	}

      /* Actually parse that token. */
      cp = prog;
      switch (tbl[(unsigned char) *prog])
	{
	case CNUM:
	  {
	    int negative = 0;
	    char *tail;

	    if (*cp == '-')
	      {
		prog++;
		while (isspace ((unsigned char) *prog))
		  prog++;

		if (!isdigit ((unsigned char) *prog) && *prog != '.')
		  return '-';
		
		negative = 1;
	      }
	    
	    while (isdigit ((unsigned char) *prog))
	      prog++;
	    if (*prog == '.')
	      {
		prog++;
		while (isdigit ((unsigned char) *prog))
		  prog++;
	      }
	    if (*prog == 'e' || *prog == 'E')
	      {
		prog++;
		if (*prog == '+' || *prog == '-')
		  prog++;
		while (isdigit ((unsigned char) *prog))
		  prog++;
	      }

	    /* Make a copy. */
	    if (prog - cp + 1 > tokstr_size)
	      {
		tokstr_size = max (prog - cp + 1, tokstr_size * 2);
		tokstr = xrealloc (tokstr, tokstr_size);
	      }
	    
	    memcpy (tokstr, cp, prog - cp);
	    tokstr[prog - cp] = 0;
	    tokstr_len = prog - cp;
	    
	    cp = tokstr + negative;
	      
	    /* Parse as floating point. */
	    tokval = strtod (cp, &tail);
	    if (*tail)
	      {
		msg (SE, _("%.*s does not form a valid number."), prog - cp, cp);
		tokval = 0.0;
		strcpy (tokstr, "0");
	      }
	    else if (negative)
	      tokval = -tokval;

	    /* Parse as integer. */
	    errno = 0;
	    tokint = strtol (cp, &tail, 10);
	    if (*tail || tokint == LONG_MIN || tokint == LONG_MAX || errno)
	      tokint = NOT_LONG;
	    else if (negative)
	      tokint = -tokint;

	    return NUM;
	  }

	case CSTR:
	  return parse_string (0);

	case CDELIM:
	  return *prog++;

	case CSTAR:
	  if (*++prog == '*')
	    {
	      prog++;
	      return EXP;
	    }
	  else
	    return '*';

	case CLT:
	  if (*++prog == '=')
	    {
	      prog++;
	      return LE;
	    }
	  else if (*prog == '>')
	    {
	      prog++;
	      return NE;
	    }
	  else
	    return LT;

	case CGT:
	  if (*++prog == '=')
	    {
	      prog++;
	      return GE;
	    }
	  else
	    return GT;

	case CNE:
	  if (*++prog == '=')
	    {
	      prog++;
	      return NE;
	    }
	  else
	    return NOT;

	case CAND:
	  prog++;
	  return AND;

	case COR:
	  prog++;
	  return OR;

	case CID:
	  {
	    int len;
	    
	    if (prog[1] == '\'' || prog[1] == '"')
	      {
		switch (tolower ((unsigned char) (*prog++)))
		  {
		  case 'b':
		    return parse_string (8);
		  case 'o':
		    return parse_string (3);
		  case 'x':
		    return parse_string (2);
		  default:
		    prog--;
		  }
	      }
	    
	    prog++;
	    while (id[(unsigned char) *prog])
	      prog++;
	    len = prog - cp;
	    strncpy (toklongstr, cp, len);
	    toklongstr[len] = 0;
	    for (cp = toklongstr; *cp; cp++)
	      *cp = toupper ((unsigned char) (*cp));
	    if (len >= 8)
	      {
		strncpy (tokstr, toklongstr, 8);
		tokstr[8] = 0;
		return ID;
	      }
	    else
	      {
		strncpy (tokstr, toklongstr, len);
		tokstr[len] = 0;
		return check_id (len);
	      }
	  }
	/* not reachable */

	case CTAGGEDQUOTE:
	  {
	    int token = parse_tagged_quote ();
	    if (token == -1)
	      break;
	    return token;
	  }

	case COTHER:
	  if (isgraph ((unsigned char) *prog))
	    msg (SE, _("Bad character in input: `%c'."), *prog++);
	  else
	    msg (SE, _("Bad character in input: `\\%o'."), *prog++);
	  break;
	}
    }
}

/* Translates the eight `0's or `1's in STRING to an unsigned char. */
static unsigned char
bin_value_func (char *string)
{
  int value = 0;
  int i;

  for (i = 0; i < 8; i++, string++)
    {
      value *= 2;

      if (*string == '1')
	value += 1;
      else if (*string != '0')
	{
	  msg (SE, _("Binary string contains bad bit `%c'."), *string);
	  value = ' ';
	  break;
	}
    }
  return value;
}

/* Translates the three [0-7]s in STRING to an unsigned char. */
static unsigned char
oct_value_func (char *string)
{
  int value = 0;
  int i;

  for (i = 0; i < 3; i++, string++)
    {
      value *= 8;

      /* PORTME. */
      if (*string >= '0' && *string <= '7')
	value += *string - '0';
      else
	{
	  msg (SE, _("Octal string contains bad octal digit `%c'."), *string);
	  value = ' ';
	  break;
	}
    }
  return value;
}

/* Translates the two [0-9a-fA-F]s in STRING to an unsigned char. */
static unsigned char
hex_value_func (char *string)
{
  int value = 0;
  int i;

  for (i = 0; i < 2; i++, string++)
    {
      value *= 16;

      /* PORTME. */
      if (isdigit ((unsigned char) *string))
	value += *string - '0';
      else if (!isxdigit ((unsigned char) *string))
	{
	  msg (SE, _("Hex string contains bad hex digit `%c'."), *string);
	  value = ' ';
	  break;
	}
      else if (islower ((unsigned char) *string))
	value += *string - 'a' + 10;
      else
	value += *string - 'A' + 10;
    }
  return value;
}

/* Parses a string from the input buffer into tokstr.  The input
   buffer pointer prog must point to the initial single or double
   quote.  BASE is 0 if it is an ordinary string, or 8, 3, or 2 for a
   binary, octal, or hexstring, respectively.  Returns token type. */
int 
parse_string (int base)
{
  /* Pointer into tokstr indicating location of next character. */
  unsigned char *cp = (unsigned char *) tokstr;
  
  /* Accumulate the entire string, joining sections indicated by +
     signs. */
  for (;;)
    {
      /* Single or double quote. */
      int c = *prog++;
      
      /* Accumulate section. */
      for (;;)
	{
	  /* Check end of line. */
	  if (*prog == 0)
	    {
	      msg (SE, _("Unterminated string constant."));
	      goto finish;
	    }
	  
	  /* Can embed quotes in a string by doubling them. */
	  if (*prog == c)
	    {
	      if (prog[1] == c)
		prog++;
	      else break;
	    }

	  if (cp - tokstr + 2 > tokstr_size)
	    {
	      tokstr_size *= 2;
	      tokstr = xrealloc (tokstr, tokstr_size);
	    }
	  *cp++ = *prog++;
	}
      prog++;

      /* Skip whitespace after final quote mark. */
      if (eof)
	break;
      for (;;)
	{
	  while (isspace ((unsigned char) *prog))
	    prog++;
	  if (*prog)
	    break;
	  if (dot)
	    goto finish;
	  if (!get_line ())
	    msg (FE, gettext (une));
	}

      /* Skip plus sign. */
      if (*prog != '+')
	break;
      prog++;

      /* Skip whitespace after plus sign. */
      if (eof)
	break;
      for (;;)
	{
	  while (isspace ((unsigned char) *prog))
	    prog++;
	  if (*prog)
	    break;
	  if (dot)
	    goto finish;
	  if (!get_line ())
	    msg (FE, gettext (une));
	}

      /* Ensure that a valid string follows. */
      if (*prog != '\'' && *prog != '"')
	{
	  msg (SE, "String expected following `+'.");
	  goto finish;
	}
    }

  /* We come here when we've finished concatenating all the string sections
     into one large string. */
finish:
  if (base != 0)
    {
      unsigned char (*value_func)(char *);
      const char *type;
      int i;

      switch (base)
	{
	case 8:
	  type = "Binary";
	  value_func = bin_value_func;
	  break;
	case 3:
	  type = "Octal";
	  value_func = oct_value_func;
	  break;
	case 2:
	  type = "Hex";
	  value_func = hex_value_func;
	  break;
	default:
	  assert (0);
	}
	   
      if ((cp - tokstr) % base)
	{
	  msg (SE, _("%s string has %d characters, which is not a "
		     "multiple of %d."), type, cp - tokstr, base);
	  cp = tokstr + ROUND_DOWN (cp - tokstr, base);
	}

      for (i = 0; i < (cp - tokstr) / base; i++)
	tokstr[i] = value_func (&tokstr[i * base]);
      cp = tokstr + (cp - tokstr) / base;
    }

  if (tokstr_len > 255)
    {
      msg (SE, _("String exceeds 255 characters in length (%d characters)."),
	   tokstr_len);
      tokstr_len = 255;
    }
      
  tokstr_len = cp - tokstr;
  *cp = 0;
  
  {
    /* FIXME. */
    int i;
    int warned = 0;

    for (i = 0; i < tokstr_len; i++)
      if (tokstr[i] == 0)
	{
	  if (!warned)
	    {
	      msg (SE, _("Sorry, literal strings may not contain null "
			 "characters.  Replacing with spaces."));
	      warned = 1;
	    }
	  tokstr[i] = ' ';
	}
  }

  return STRING;
}
      
/* Removes comments, handles terminal dot in getl_buf. */
int
preprocess_line (void)
{
  char *cp;			/* iterator in getl_buf */
  int on;			/* 1=we're in a comment */
  int quote;			/* ' or ": we're in that sort of quoted string */
  int len;			/* local copy of getl_buf_len */

  /* Remove C-style comments begun by slash-star and terminated by
     star-slash or end-of-line. */
  quote = 0;
  on = 1;
  for (cp = getl_buf; *cp;)
    {
      /* Toggle quoting. */
      if (on && (*cp == '\'' || *cp == '"'))
	{
	  if (!quote)
	    quote = *cp;
	  else if (*cp == quote)
	    quote = 0;
	}
      
      /* If we're not quoting, toggle commenting. */
      if (!quote)
	{
	  if (cp[0] == '/' && cp[1] == '*')
	    {
	      on = 0;
	      *cp++ = ' ';
	      *cp++ = ' ';
	      continue;
	    }
	  else if (cp[0] == '*' && cp[1] == '/' && !on)
	    {
	      on = 1;
	      *cp++ = ' ';
	      *cp++ = ' ';
	      continue;
	    }
	}
      
      /* Check commenting. */
      if (on)
	cp++;
      else
	*cp++ = ' ';
    }

  /* Strip trailing whitespace, check for & remove terminal dot. */
  len = getl_buf_len;
  while (len > 0 && isspace ((unsigned char) getl_buf[len - 1]))
    len--;
  if (len > 0 && getl_buf[len - 1] == set_endcmd)
    dot = 1, len--;
  else if (len == 0 && set_nullline)
    dot = 1;
  else
    dot = 0;
  getl_buf[len] = 0;
  getl_buf_len = len;

  /* In batch mode, strip leading indentors and insert a terminal dot
     as necessary. */
  if (getl_interactive != 2 && getl_mode == GETL_MODE_BATCH)
    {
      if (getl_buf[0] == '+' || getl_buf[0] == '-' || getl_buf[0] == '.')
	getl_buf[0] = ' ';
      else if (getl_buf[0] && !isspace ((unsigned char) getl_buf[0]))
	putfwd ('.');
    }

  prog = getl_buf;

  return 1;
}

/* Read a line for use by the tokenizer. */
int
get_line (void)
{
  if (!getl_read_line ())
    return 0;
  return preprocess_line ();
}

/* Keywords match if one of the following is true: KW and TOK are
   identical, or TOK is at least 3 characters long and those
   characters are identical to KW.  */
int
(id_match) (const char *kw, const char *tok)
{
  int tlen;

  if (streq (kw, tok))
    return 1;

  tlen = strlen (tok);
  if (tlen >= 3 && !strncmp (kw, tok, tlen))
    return 1;
  return 0;
}

/* Returns the name of a keyword token. */
const char *
tokname (int token)
{
  static const char *kwtab[WITH - AND + 1] =
  {
    "AND", "OR", "NOT", "EQ", "GE", "GT", "LE", "LT",
    "NE", "ALL", "BY", "TO", "WITH",
  };

  if (token >= AND && token <= WITH)
    return kwtab[token - AND];
  if (token == '=')
    return kwtab[EQ - AND];
  return _("<ERROR>");
}

/* Tagged quotes. */

/* Appends C to tokstr. */
static inline void
add_tokstr_char (int c)
{
  if (tokstr_len >= tokstr_size)
    {
      tokstr_size *= 2;
      tokstr = xrealloc (tokstr, tokstr_size);
    }
  tokstr[tokstr_len++] = c;
}

/* Appends X to tokstr. */
static inline void
add_tokstr_unsigned (unsigned x)
{
  if (tokstr_len + 1 >= tokstr_size)
    {
      tokstr_size *= 2;
      tokstr = xrealloc (tokstr, tokstr_size);
    }
  STORE_2 (&tokstr[tokstr_len], x);
  tokstr_len += 2;
}

/* Appends N characters starting at P to tokstr. */
static inline void
add_tokstr_string (char *p, int n)
{
  while (tokstr_len + n > tokstr_size)
    {
      tokstr_size *= 2;
      tokstr = xrealloc (tokstr, tokstr_size);
    }
  memcpy (&tokstr[tokstr_len], p, n);
  tokstr_len += n;
}

/* Reads a tagged quote from the input stream.  Tagged quotes, which
   may span multiple lines, contain sequences of ordinary quotes and
   "tags", optionally separated by commas.  Each single tag or quote
   must be on a single line.  The whole jumble is concatenated into a
   single token.  Possible tags are:

   font=FONT-NAME               (sets the current font)
   family=FAMILY-NAME           (sets the current font family)
   pos=POSITION                 (sets the position within the font family)
   r i b bi                     (sets roman, italic, bold, bold-italic font)
   color=NAME                   (sets color according to NAME, which may be
   symbolic or of form #rrggbb or #rrrrggggbbbb)

 */
static int
parse_tagged_quote (void)
{
  int token;

  prog++;			/* Skip initial '{'. */
  token = STRING;
  tokstr_len = 0;

  while (1)
    {
      while (1)
	{
	  while (isspace ((unsigned char) *prog) || *prog == ',')
	    prog++;
	  if (*prog)
	    break;
	  if (eof)
	    msg (FE, gettext (une));
	  if (dot)
	    {
	      msg (SE, _("Terminal dot may not be included inside tagged quote."));
	      dot = 0;
	      goto lose;
	    }
	  else if (!get_line ())
	    {
	      eof = 1;
	      msg (SE, _("Unexpected end of file parsing tagged quote."));
	      goto lose;
	    }
	}

      if (*prog == '\'' || *prog == '"')
	{
	  int quote = *prog++;
	  while (*prog && *prog != quote)
	    {
	      /* FIXME: add support for \NNN sequences. */
	      if (*prog == '\\')
		{
		  prog++;
		  if (!*prog)
		    {
		      /* FIXME: could revamp this to stitch together
		         lines terminated with \; would need revisions
		         to get_line(). */
		      msg (SE, _("Unexpected end of line following \\ in string "
			   "literal inside tagged quote."));
		      goto lose;
		    }
		  if (isdigit ((unsigned char) *prog))
		    {
		      int c = *prog++ - '0';
		      if (isdigit ((unsigned char) *prog))
			c = c * 8 + *prog++ - '0';
		      if (isdigit ((unsigned char) *prog))
			c = c * 8 + *prog++ - '0';
		      if (c)
			add_tokstr_char (c);
		      else
			{
			  /* NULLs must be tripled. */
			  add_tokstr_char (0);
			  add_tokstr_char (0);
			  add_tokstr_char (0);
			}
		    }
		  else if (*prog == '\\' || *prog == '\'' || *prog == '"')
		    add_tokstr_char (*prog++);
		  else
		    msg (SE, _("Unknown escape sequence `\\%c' in string literal "
			 "inside tagged quote."), *prog++);
		}
	      else
		add_tokstr_char (*prog++);
	    }
	  if (!*prog)
	    {
	      msg (SE, _("Unterminated string constant within tagged quote."));
	      goto lose;
	    }
	  prog++;
	}
      else if (isalpha ((unsigned char) *prog))
	{
	  char key[9], *cp = key;
	  char *value;

	  token = TAGGED_QUOTE;
	  while (*prog && isalpha ((unsigned char) *prog) && cp < &key[9])
	    *cp++ = *prog++;
	  while (*prog && isalpha ((unsigned char) *prog))
	    prog++;
	  *cp = 0;

	  while (isspace ((unsigned char) *prog))
	    prog++;
	  if (*prog != '=')
	    {
	      msg (SE, _("`=' expected after key `%s' in tagged quote."), key);
	      goto lose;
	    }

	  prog++;
	  while (isspace ((unsigned char) *prog))
	    prog++;
	  value = prog;
	  while (*prog && *prog != ',' && *prog != '\'' && *prog != '"')
	    prog++;

	  /* Now value points to the beginning of the value, prog
	     points to the first character after the value.

	     FIXME: Look how non-optimal this test-and-skip structure
	     is! */
	  add_tokstr_char (0);
	  if (streq (key, "r"))
	    {
	      add_tokstr_unsigned (5);
	      add_tokstr_char (TAG_FONT_BY_POSITION);
	      add_tokstr_char (OUTP_F_R);
	    }
	  else if (streq (key, "i"))
	    {
	      add_tokstr_unsigned (5);
	      add_tokstr_char (TAG_FONT_BY_POSITION);
	      add_tokstr_char (OUTP_F_I);
	    }
	  else if (streq (key, "b"))
	    {
	      add_tokstr_unsigned (5);
	      add_tokstr_char (TAG_FONT_BY_POSITION);
	      add_tokstr_char (OUTP_F_B);
	    }
	  else if (streq (key, "bi"))
	    {
	      add_tokstr_unsigned (5);
	      add_tokstr_char (TAG_FONT_BY_POSITION);
	      add_tokstr_char (OUTP_F_BI);
	    }
	  else if (streq (key, "font"))
	    {
	      add_tokstr_unsigned (6 + (prog - value));
	      add_tokstr_char (TAG_FONT_BY_NAME);
	      add_tokstr_string (value, prog - value + 1);
	      add_tokstr_char (0);
	    }
	  else if (streq (key, "family"))
	    {
	      add_tokstr_unsigned (6 + (prog - value));
	      add_tokstr_char (TAG_FONT_BY_FAMILY);
	      add_tokstr_string (value, prog - value + 1);
	      add_tokstr_char (0);
	    }
	  else if (streq (key, "pos"))
	    {
	      char *tail;
	      int pos = strtol (value, &tail, 0);
	      if (tail < prog || pos < 0 || pos > 3)
		{
		  msg (SE, _("Bad font position in tagged quote."));
		  goto lose;
		}
	      add_tokstr_unsigned (5);
	      add_tokstr_char (TAG_FONT_BY_POSITION);
	      add_tokstr_char (pos);
	    }
	  else if (streq (key, "color"))
	    {
	      /* FIXME: color */
	      msg (MW, _("Color not yet supported in tagged quotes."));
	      add_tokstr_unsigned (4);
	      add_tokstr_char (TAG_NO_OP);
	    }
	}
    }

  if (tokstr_len > 255)
    token = TAGGED_QUOTE;
  return token;

lose:
  return -1;
}

/* Skip a COMMENT command. */
void
skip_comment ()
{
  for (;;)
    {
      get_line();
      if (put == '.')
	break;

      prog = &getl_buf[getl_buf_len];
      if (dot)
	break;
    }
}


/* Helper routines for inferior compilers. */

#if !__GNUC__
#undef match_id
int
match_id (char *s)
{
  if (token == ID && id_match (s, tokstr))
    {
      get_token ();
      return 1;
    }
  return 0;
}

int
(match_tok) (int t)
{
  if (token == t)
    {
      get_token ();
      return 1;
    }
  return 0;
}

int
(match_int) (int x)
{
  if (token == NUM && x == tokint)
    {
      get_token ();
      return 1;
    }
  return 0;
}
#endif /* !__GNUC__ */
