@
% $Id: newprod.w,v 1.2 1991/08/06 16:09:15 fj Exp $
The following sections describe the means available for encoding of
productions.

For each rule, |lhs| is the scrap categories to be matched, and |rhs|
is the new scrap to be created (we only allow one new scrap to be
created by each rule).  In |rhs|, |category| is the category of the
new scrap, and |translation| is a \\{printf}-like string describing
the token sequence for the new scrap.

The following feature is used to implement primitive context-sensitive
productions:

If |scraps_to_keep| has a positive value~$k$, we keep the first $k$ of
the matched scraps; otherwise, we keep the last $-k$ of the matched
scraps.  That is, if a production matches the scraps |pp|, \dots,
|pp+l-1|, and $k>0$, we keep the scraps |pp|, \dots, |pp+k-1|; if
$k<0$, we keep the scraps |pp+l+k|, \dots, |pp+l-1|; if $k=0$, all the
matched scraps are removed.  Restriction: if the length of |lhs| is
$l$, then at most $l-1$ scraps may be kept.

|displacement| is the amount that is added to |pp| when the rule is
applied.

@d max_lhs_length 4
@d any (max_category + 1)

@<Decl...@>=
typedef struct {
    int category;
    char *translation;
} rhs_type;

typedef struct {
    int id; /* for debugging */
    int lhs[max_lhs_length];
    rhs_type rhs;
    int scraps_to_keep, lhs_length, displacement;
} production;


@ We shall organize the rules in a trie structure for fast matching.
We store a pointer to a rule in position zero of a trie node if the
sequence of transitions leading to the node corresponds to the
left-hand side of the rule; |rule_of(q)| denotes the rule stored in
the trie node pointed to by~|q|, and |transition(q,c)| denotes the
trie node reached from the node pointed to by~|q| on transition
(category)~|c|.

@d rule_of(q) ((*q)[0].rule)
@d transition(q,c) ((*q)[c].next)

@<Decl...@>=
typedef union trie_node_element trie_node[any + 1];

union trie_node_element {
    production *rule;
    trie_node *next;
};


@
@d trie_root trie_node_mem
@d max_no_of_nodes 200
@<Global...@>=
trie_node trie_node_mem[max_no_of_nodes] = { NULL, /* etc. */ };
int node_no = 1;  /* number of trie nodes allocated */


@ This procedure is called whenever a rule is matched.

|any| is the largest category; if a category |c>=any| appears in
|rhs|, we take the category |(pp+(c-any))->cat|.

@c
void reduce (rule)
production *rule;
{
    int l = rule->lhs_length, k = rule->scraps_to_keep;
    int d = rule->displacement;
    int newcat = rule->rhs.category; /* category for the new scrap */
    scrap_pointer j = k > 0 ? pp + k : pp ; /* position of the new scrap */
    text_pointer simple_trans = NULL;
    int simple_mathness = 0;

    if (newcat >= any)
	newcat = (pp + (newcat - any))->cat;

    @<Generate token list@>@;

    j->cat = newcat;
    if ((j->trans = simple_trans) != NULL)
	j->mathness = simple_mathness;
    else {
	j->trans = text_ptr; freeze_text;
	j->mathness = 4 * cur_mathness + init_mathness;
    }

    @<Fill vacant scrap positions@>@;
    @<Print a snapshot of the scrap list if debugging@>@;
    @<Change |pp| to $\max(|scrap_base|,|pp+d|)$@>@;
}



@ In some cases, the translation consists of a copy of an existing
scrap; we first test to see whether this is the case.

@d digit_value(c) ((c) - '0')
@<Generate token list@>=
 {  char *s = rule->rhs.translation, c;

    if (*s == '%' && isdigit(c = *(s + 1)) && *(s + 2) == '\0') {
	simple_trans = (pp + digit_value (c))->trans;
	simple_mathness = (pp + digit_value (c))->mathness;
    } else while ((c = *s++) != '\0') {
	if (c != '%')
	    @<Append character token@>@;
	else
	    switch (c = *s++) {
	    case '+': app (indent); break;
	    case '-': app (outdent); break;
	    case 'p': app (opt); app (*s++); break;
	    case 'f': exit_math_mode (); app (force); break;
	    case 'F': exit_math_mode (); app (big_force); break;
	    case 'm': enter_math_mode (); break;
	    case 'h': exit_math_mode (); break;
	    case 'b': exit_math_mode (); app (backup); break;
	    case 'B': exit_math_mode (); app (break_space); break;
	    case 'c': exit_math_mode (); app (cancel); break;
	    case 'r': app (relax); break;
	    case ',': enter_math_mode (); app ('\\'); app (','); break;
	    case 't': exit_math_mode (); app ('\\'); app ('t'); break;
	    case '!': case '*': case '$':
		{ boolean underline = false,
		      reserve = false, unreserve = false;

		  do
		      switch (c) {
		      case '!': underline = true; break;
		      case '*': reserve = true; break;
		      case '$': unreserve = true; break;
		      default: confusion ("illegal format modifier"); break;
		      }
		  while (!isdigit (c = *s++));

		  if (underline)
		      make_underlined (pp + digit_value (c));

		  if (reserve)
		      make_reserved (pp + digit_value (c));
		  else if (unreserve)
		      make_nonreserved (pp + digit_value (c));
	        } /* fall through */
	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
		big_app1 (pp + digit_value (c));
		break;
	    default: putchar (c);
		confusion ("illegal format string");
		break;
	    }
    }
 }


@
@<Append character token@>=
switch (c) {
 case ' ': exit_math_mode (); app (' '); break;
 case '~': app ('~'); break;
 default: enter_math_mode (); app (c); break;
 }



@ When we have applied a production to the sequence of scraps, we
usually remove scraps (we never create more scraps than we remove),
thereby creating a ``hole'' in the sequence.  We fill that hole by
moving scraps downward.

@<Fill vacant...@>=
 {  scrap_pointer q = pp + (k < 0 ? l + k : l), p = j + 1;

    if (p < q) {
	for (; q <= lo_ptr; p++, q++) {
	    p->cat = q->cat; p->trans = q->trans;
	    p->mathness = q->mathness;
	}
	lo_ptr += p - q;
    }
 }


@ This procedure installs a production and performs some checks to see
whether it is a valid rule.

For now, we just report errors; we should actually take some more
drastic actions.

@d error printf

@c
trie_node *get_new_trie_node ()
{
    if (node_no >= max_no_of_nodes)
	overflow ("productions");

    return trie_node_mem + node_no++;
}

void install_production (rule)
production *rule;
{
    @<Check left-hand side of |rule|@>@;
    @<Check right-hand side of |rule|@>@;
    @<Install |rule| in the trie structure@>@;
    @<Compute |displacement|@>@;
}

@
@<Decl...@>=
void install_production ();

@
@<Check left...@>=
 {  int *p = rule->lhs, *q = rule->lhs + max_lhs_length - 1;

    while (q > p && *q == 0)
	q--;

    if (*q == 0)
	error ("Rule %d: empty lhs\n", rule->id);

    rule->lhs_length = q - p + 1;

    for (; p <= q; p++)
	if (*p != any && !valid_cat (*p))
	    error ("Rule %d: unknown category in lhs: %d\n", rule->id, *p);
 }



@
@<Check right...@>=
 {  int c = rule->rhs.category;

    if ((c < any || c > any + rule->lhs_length) && !valid_cat (c))
	error ("Rule %d: invalid category in rhs: %d\n", rule->id, c);

    if (abs (rule->scraps_to_keep) >= rule->lhs_length)
	error ("Rule %d: right-hand side longer than left-hand side\n",
	       rule->id);
    
    /* we should also check the format string */
 }


@
@<Install |rule|...@>=
 {  int *p = rule->lhs, *lhs_end = p + rule->lhs_length;
    trie_node *q = trie_root;

    while (p < lhs_end) {
	if (transition (q, *p) == NULL)
	    transition (q, *p) = get_new_trie_node ();
	q = transition (q, *p++);
    }

    if (rule_of (q) != NULL)
	error ("Rules %d and %d: identical left-hand sides",
	       rule_of (q)->id, rule->id);

    rule_of (q) = rule;
 }



@ For the time being, we compute |displacement| based on local
considerations.  (The original \.{WEAVE} considered the total set of
rules.)

A production can have an |lhs| of length |max_lhs_length|.  This means
that we have to move |pp| |max_lhs_length-1| positions to the left of
the first scrap position that gets a new category.

@<Compute |displacement|@>=
 {
    int k = rule->scraps_to_keep;
    int j = k > 0 ? k : 0; /* position of new scrap relative to |pp| */
    int d = 1 - max_lhs_length + j;
    int old = rule->lhs[j], new = rule->rhs.category;

    if (new >= any) {
	/* the new category is determined by the matched scraps */
	if (new - any == j)
	    d++;
    } else if (old == new)
	d++;

    rule->displacement = d;
 }



@ This function tests whether the categories at |p|, |p+1|, \dots\
match the categories in the trie structure.

@^recursion@>

@c
production *cat_match (p, q)
scrap_pointer p;
trie_node *q;
{   production *preferred = NULL, *alternative = NULL;
    int c = p->cat;
    trie_node *next;

    if (c == 0) /* end of scrap sequence */
	return rule_of (q);

    if ((next = transition (q, c)) != NULL)
	preferred = cat_match (p + 1, next);

    if ((next = transition (q, any)) != NULL)
	alternative = cat_match (p + 1, next);

    if (preferred != NULL)
	if (alternative != NULL
	    && alternative->lhs_length > preferred->lhs_length)
	    return alternative;
	else return preferred;

    if (alternative != NULL)
	return alternative;

    return rule_of (q);
}


@
@<Match a production at |pp|, or increase |pp| if there is no match@>=
 {   production *rule = cat_match (pp, trie_root);

     if (rule != NULL)
	 reduce (rule);
     else pp++; /* if no match was found, we move to the right */
 }
