Copyright (C) 1994, Digital Equipment Corp.
                                                             
 File: M3String.m3                                           
MODULE M3String;
IMPORT M3Buf, Text, TextF, Word, CG, Target;
CONST
  NO_UID = -1;
TYPE
  Buf       = ARRAY OF CHAR;
  HashTable = REF ARRAY OF T;
REVEAL
  T = BRANDED REF RECORD
    prefix : T         := NIL;
    suffix : T         := NIL;
    body   : TEXT      := NIL;
    length : INTEGER   := 0;
    hash   : INTEGER   := 0;
    uid    : INTEGER   := 0;
    next   : T         := NIL;
  END;
  (* There are two variants of a String.T:
       (body # NIL)  => the characters are in body
       ELSE          => prefix & suffix
  *)
CONST
  Digits = ARRAY [0..9] OF CHAR {'0','1','2','3','4','5','6','7','8','9'};
VAR
  hashMask  : INTEGER   := 511; (* == 2^9-1 == 9 bits on *)
  hashTable : HashTable := NEW (HashTable, 512);
  next_t    : T         := NIL;
  nStrings  : INTEGER   := 0;
-------------------------------------------------------------- exported ---
PROCEDURE Add (x: TEXT): T =
  BEGIN
    IF (next_t = NIL) THEN next_t := NEW (T) END;
    next_t.prefix := NIL;
    next_t.suffix := NIL;
    next_t.body   := x;
    next_t.length := Text.Length (x);
    next_t.uid    := NO_UID;
    RETURN Intern (SUBARRAY (x^, 0, next_t.length));
  END Add;
PROCEDURE FromStr (READONLY buf: Buf;  length: INTEGER): T =
  VAR t: T;
  BEGIN
    IF (next_t = NIL) THEN next_t := NEW (T) END;
    next_t.prefix := NIL;
    next_t.suffix := NIL;
    next_t.body   := NIL; (* for now *)
    next_t.length := MIN (length, NUMBER (buf));
    next_t.uid    := NO_UID;
    t := Intern (SUBARRAY (buf, 0, next_t.length));
    RETURN t;
  END FromStr;
PROCEDURE Concat (a, b: T): T =
  VAR buf: ARRAY [0..3] OF CHAR;
  BEGIN
    IF (a = NIL) OR (a.length = 0) THEN RETURN b END;
    IF (b = NIL) OR (b.length = 0) THEN RETURN a END;
    IF (next_t = NIL) THEN next_t := NEW (T) END;
    next_t.prefix := a;
    next_t.suffix := b;
    next_t.body   := NIL;
    next_t.length := a.length + b.length;
    next_t.uid    := NO_UID;
    RETURN Intern (buf);
  END Concat;
PROCEDURE ToText (t: T): TEXT =
  VAR x: TEXT;
  BEGIN
    IF (t = NIL) THEN RETURN NIL END;
    IF (t.body = NIL) THEN
      x := TextF.New (t.length);
      Flatten (t, x^, 0);
      t.body := x;
    END;
    RETURN t.body;
  END ToText;
PROCEDURE Put (wr: M3Buf.T;  t: T) =
  BEGIN
    IF (t = NIL) THEN
      (* done *)
    ELSIF (t.body # NIL) THEN
      FOR i := 0 TO t.length-1 DO EmitChar (wr, t.body[i]) END;
    ELSE
      Put (wr, t.prefix);
      Put (wr, t.suffix);
    END;
  END Put;
PROCEDURE Init_chars (offset: INTEGER;  t: T) =
  BEGIN
    IF (t = NIL) THEN
      (* done *)
    ELSIF (t.body # NIL) THEN
      CG.Init_chars (offset, t.body);
    ELSE
      Init_chars (offset, t.prefix);
      Init_chars (offset + t.prefix.length * Target.Char.size, t.suffix);
    END;
  END Init_chars;
PROCEDURE Length (t: T): INTEGER =
  BEGIN
    IF (t = NIL)
      THEN RETURN 0;
      ELSE RETURN t.length;
    END;
  END Length;
PROCEDURE GetUID (t: T): INTEGER =
  BEGIN
    RETURN t.uid;
  END GetUID;
PROCEDURE SetUID (t: T;  uid: INTEGER) =
  BEGIN
    t.uid := uid;
  END SetUID;
PROCEDURE Hash (t: T): INTEGER =
  BEGIN
    IF (t = NIL)
      THEN RETURN 953;
      ELSE RETURN t.hash;
    END;
  END Hash;
-------------------------------------------------------------- internal ---
PROCEDURE Intern (READONLY buf: Buf): T =
  VAR hash, bucket: INTEGER;  t: T;
  BEGIN
    (* search the hash table *)
    next_t.hash := 0;
    hash := InternHash (next_t, 0, buf);
    bucket := Word.And (hash, hashMask);
    t := hashTable[bucket];
    WHILE (t # NIL) DO
      IF (t.hash = hash) AND Equal (t, next_t, buf) THEN RETURN t; END;
      t := t.next;
    END;
    (* we didn't find the string => add it to the hash table *)
    t := next_t;
    t.hash := hash;
    t.next := hashTable [bucket];
    hashTable [bucket] := t;
    next_t := NIL; (* since we've used it! *)
    IF (t.prefix = NIL) AND (t.body = NIL) THEN
      t.body := Text.FromChars (buf);
    END;
    INC (nStrings);
    IF (nStrings > 2 * NUMBER (hashTable^)) THEN ExpandHashTable () END;
    RETURN t;
  END Intern;
PROCEDURE ExpandHashTable () =
  VAR
    n_old   := NUMBER (hashTable^);
    n_new   := n_old + n_old;
    new     := NEW (HashTable, n_new);
    newMask := hashMask + hashMask + 1;
    t, u    : T;
    x       : INTEGER;
  BEGIN
    FOR i := 0 TO n_new - 1 DO new[i] := NIL END;
    FOR i := 0 TO n_old - 1 DO
      t := hashTable [i];
      WHILE (t # NIL) DO
        u := t.next;
        x := Word.And (t.hash, newMask);
        t.next := new [x];
        new [x] := t;
        t := u;
      END;
    END;
    hashMask := newMask;
    hashTable := new;
  END ExpandHashTable;
PROCEDURE InternHash (t: T;  hash: INTEGER;  READONLY buf: Buf): INTEGER =
  BEGIN
    IF (t = NIL) THEN RETURN 0 END;
    IF (hash = 0) AND (t.hash # 0) THEN RETURN t.hash END;
    IF (t.body # NIL) THEN
      FOR i := 0 TO t.length - 1 DO
        hash := Word.Plus (Word.Times (2, hash), ORD (t.body[i]));
      END;
    ELSIF (t.prefix # NIL) THEN
      (* a concatentation *)
      hash := InternHash (t.prefix, hash, buf);
      hash := InternHash (t.suffix, hash, buf);
    ELSE (* use the buffer *)
      FOR i := 0 TO t.length - 1 DO
        hash := Word.Plus (Word.Times (2, hash), ORD (buf[i]));
      END;
    END;
    RETURN hash;
  END InternHash;
PROCEDURE Equal (a, b: T;  READONLY buf: Buf): BOOLEAN =
  BEGIN
    IF (a.length # b.length) THEN RETURN FALSE END;
    FOR i := 0 TO a.length - 1 DO
      IF GetCh (a, buf, i) # GetCh (b, buf, i) THEN RETURN FALSE; END;
    END;
    RETURN TRUE;
  END Equal;
PROCEDURE GetCh (t: T;  READONLY buf: Buf;  i: INTEGER): CHAR =
  VAR u: T;
  BEGIN
    (* walk the tree to find the right segment *)
    WHILE (t.prefix # NIL) DO
      u := t.prefix;
      IF (u.length > i)
        THEN t := t.prefix;
        ELSE t := t.suffix;  DEC (i, u.length);
      END;
    END;
    IF (t.body # NIL)
      THEN RETURN t.body[i];
      ELSE RETURN buf[i];
    END;
  END GetCh;
PROCEDURE Flatten (t: T;  VAR buf: Buf;  start: INTEGER) =
  BEGIN
    IF (t = NIL) THEN
      (* done *)
    ELSIF (t.body # NIL) THEN
      SUBARRAY (buf, start, t.length) := SUBARRAY (t.body^, 0, t.length);
    ELSE
      WHILE (t # NIL) AND (t.body = NIL) DO
        Flatten (t.suffix, buf, start + Length (t.prefix));
        t := t.prefix;
      END;
      Flatten (t, buf, start);
    END;
  END Flatten;
PROCEDURE EmitChar (wr: M3Buf.T;  c: CHAR) =
  VAR i: INTEGER;
  BEGIN
    IF (c < ' ') OR (c = '\"') OR (c = '\'') OR ('~' < c) OR (c = '\\') THEN
      i := Word.And (ORD (c), 255);
      M3Buf.PutChar (wr, '\\');
      M3Buf.PutChar (wr, Digits[i DIV 64]);  i := Word.And (i, 63);
      M3Buf.PutChar (wr, Digits[i DIV 8]);   i := Word.And (i, 7);
      M3Buf.PutChar (wr, Digits[i]);
    ELSE (* simple graphic character *)
      M3Buf.PutChar (wr, c);
    END;
  END EmitChar;
-------------------------------------------------------- initialization ---
PROCEDURE Initialize () =
  BEGIN
    FOR i := 0 TO LAST (hashTable^) DO hashTable[i] := NIL; END;
  END Initialize;
PROCEDURE Reset () =
  VAR t: T;
  BEGIN
    FOR i := FIRST (hashTable^) TO LAST (hashTable^) DO
      t := hashTable[i];
      WHILE (t # NIL) DO t.uid := NO_UID;  t := t.next END;
    END;
  END Reset;
BEGIN
END M3String.