#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <savant.h>
#include <savutil.h>

DB_INT save_ddv(FILE *stream, DenseDocVec *dv)
{
  DB_INT num = dv->num_entries;
  if ((fwrite_big(&num, sizeof(DB_INT), 1, stream) != 1) ||
      (fwrite_big(dv->wordcodes, sizeof(DB_INT), 
		  WORD_ENCODE_WIDTH*num, stream) != WORD_ENCODE_WIDTH*num) ||
      (fwrite_big(dv->weights, sizeof(DB_FLOAT), num, stream) != num))
    return -1;
  return 0;
}

DenseDocVec *load_ddv(FILE *stream)
{
  DB_INT num;
  DenseDocVec *ret;
  if (fread_big(&num, sizeof(DB_INT), 1, stream) != 1)
    return NULL;
  if (!(ret = (DenseDocVec *)malloc(sizeof(DenseDocVec))))
    return NULL;
  ret->num_entries = num;
  if (!(ret->wordcodes = (DB_UINT *)malloc(sizeof(DB_UINT) *
						WORD_ENCODE_WIDTH * num)))
    {
      free(ret);
      return NULL;
    }
  if (!(ret->weights = (DB_UINT *)malloc(sizeof(DB_UINT) * num)))
    {
      free(ret->wordcodes);
      free(ret);
      return NULL;
    }
  if ((fread_big(ret->wordcodes, sizeof(DB_INT),
	     WORD_ENCODE_WIDTH*num, stream) != WORD_ENCODE_WIDTH*num) ||
      (fread_big(ret->weights, sizeof(DB_UINT), num, stream) != num))
    return NULL;
  return ret;
}

char *ddv2string(DenseDocVec *ddv, DB_INT numwords)
{
  DB_INT *indices;
  DB_INT i, j, tmp, tmp2;
  char *word, *cpos;

  indices = malloc(numwords * sizeof(DB_INT));
  if (indices == NULL)
    return NULL;
  cpos = word = malloc(16 * numwords + 1);
  if (cpos == NULL) {
    free(indices);
    return NULL;
  }
 
  if (numwords > ddv->num_entries)
    numwords = ddv->num_entries;
  
  for (j = 0; j < numwords; j++)
    indices[j] = j;
  
  for (i = j; i < ddv->num_entries; i++)
    {
    for (j = 0; j < numwords; j++)
      if (ddv->weights[i] >= ddv->weights[indices[j]])
        {
	  tmp = i;
	  for (; j < numwords; j++)
	    {
	      tmp2 = indices[j];
	      indices[j] = tmp;
	      tmp = tmp2;
	    }
	  break;
	}
    }
  
  for (j = 0; j < numwords; j++)
    {
      decode_word(ddv->wordcodes + WORD_ENCODE_WIDTH*indices[j], cpos);
      while (*++cpos)
	;
      *(cpos++) = ' ';
    }
  *--cpos = '\0';
  free(indices);
  return word;
}

DB_INT ddv2description(DenseDocVec *ddv, DB_FLOAT weight, char ***words, DB_INT **wts)
/* Creates a NULL-terminated array of words, an array of word weights of the
 * same length, and returns the total number of words returned. These
 * can be sent to description2ddv to generate a roughly equivalent ddv; it is
 * also easily made into a text format which can be transfered between
 * programs. The strings and both arrays become the property of the caller. */
{
  DB_INT sz = 0, max = 8, total = 0, current = 0, i, bindex, bwt;
  char *done;

  done = malloc(ddv->num_entries);
  if (done == NULL)
    return -1;
  
  *wts = (DB_INT *)malloc(8 * sizeof(DB_INT));
  /* *wts is used temporarily as storage for the indices of the selected
   * lines; it is changed by entry at the end to the weights of those words */

  for (i = 0; i < ddv->num_entries; i++)
    {
      total += ddv->weights[i];
      done[i] = 0;
    }

  while (current < total * weight)
    {
      bwt = 0;
      for (i = 0; i < ddv->num_entries; i++)
	{
	  if (!done[i] && (ddv->weights[i] > bwt))
	    {
	      bwt = ddv->weights[i];
	      bindex = i;
	    }
	}
      if (sz == max)
	{
	  max *= 2;
	  *wts = (DB_INT *)realloc(*wts, max * sizeof(DB_INT));
	}
      current += bwt;
      done[bindex] = 1;
      (*wts)[sz++] = bindex;
    }

  /* Put the right stuff into the return variables */
  *words = (char **)malloc((sz + 1) * sizeof(char *));
  for (i = 0; i < sz; i++)
    {
      (*words)[i] = (char *)malloc(16);
      decode_word(ddv->wordcodes + WORD_ENCODE_WIDTH*(*wts)[i], (*words)[i]);
      (*wts)[i] = ddv->weights[(*wts)[i]];
    }
  (*words)[sz] = '\0';
  free(done);
  return 0;
}

DenseDocVec *description2ddv(char **words, DB_INT *weights)
{
  DenseDocVec *ret = (DenseDocVec *)malloc(sizeof(DenseDocVec));
  DB_INT i;
  for (ret->num_entries = 0; words[ret->num_entries]; ret->num_entries++)
    ;
  ret->wordcodes = malloc(WORD_ENCODE_WIDTH * sizeof(DB_INT) * ret->num_entries);
  ret->weights = malloc(sizeof(DB_FLOAT) * ret->num_entries);
  for (i = 0; words[i]; i++)
    {
      encode_word(words[i], ret->wordcodes + WORD_ENCODE_WIDTH * i, BODY_FIELD);
      ret->weights[i] = weights[i];
    }
  return ret;
}
