/* PSPP - a program for statistical analysis.
   Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */

#include <config.h>

#include <limits.h>
#include <math.h>
#include <stdlib.h>

#include <data/casegrouper.h>
#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
#include <data/variable.h>
#include <language/command.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
#include <libpspp/array.h>
#include <libpspp/compiler.h>
#include <libpspp/message.h>
#include <libpspp/assertion.h>
#include <math/moments.h>
#include <output/manager.h>
#include <output/table.h>

#include "xalloc.h"

#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid

/* DESCRIPTIVES private data. */

struct dsc_proc;

/* Handling of missing values. */
enum dsc_missing_type
  {
    DSC_VARIABLE,       /* Handle missing values on a per-variable basis. */
    DSC_LISTWISE        /* Discard entire case if any variable is missing. */
  };

/* Describes properties of a distribution for the purpose of
   calculating a Z-score. */
struct dsc_z_score
  {
    const struct variable *src_var;   /* Variable on which z-score is based. */
    struct variable *z_var;     /* New z-score variable. */
    double mean;		/* Distribution mean. */
    double std_dev;		/* Distribution standard deviation. */
  };

/* DESCRIPTIVES transformation (for calculating Z-scores). */
struct dsc_trns
  {
    struct dsc_z_score *z_scores; /* Array of Z-scores. */
    int z_score_cnt;            /* Number of Z-scores. */
    const struct variable **vars;     /* Variables for listwise missing checks. */
    size_t var_cnt;             /* Number of variables. */
    enum dsc_missing_type missing_type; /* Treatment of missing values. */
    enum mv_class exclude;      /* Classes of missing values to exclude. */
  };

/* Statistics.  Used as bit indexes, so must be 32 or fewer. */
enum dsc_statistic
  {
    DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
    DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
    DSC_MAX, DSC_SUM, DSC_N_STATS,

    /* Only valid as sort criteria. */
    DSC_NAME = -2,              /* Sort by name. */
    DSC_NONE = -1               /* Unsorted. */
  };

/* Describes one statistic. */
struct dsc_statistic_info
  {
    const char *identifier;     /* Identifier. */
    const char *name;		/* Full name. */
    enum moment moment;		/* Highest moment needed to calculate. */
  };

/* Table of statistics, indexed by DSC_*. */
static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
  {
    {"MEAN", N_("Mean"), MOMENT_MEAN},
    {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
    {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
    {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
    {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
    {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
    {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
    {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
    {"RANGE", N_("Range"), MOMENT_NONE},
    {"MINIMUM", N_("Minimum"), MOMENT_NONE},
    {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
    {"SUM", N_("Sum"), MOMENT_MEAN},
  };

/* Statistics calculated by default if none are explicitly
   requested. */
#define DEFAULT_STATS                                                   \
	((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN)     \
         | (1ul << DSC_MAX))

/* A variable specified on DESCRIPTIVES. */
struct dsc_var
  {
    const struct variable *v;         /* Variable to calculate on. */
    char z_name[VAR_NAME_LEN + 1]; /* Name for z-score variable. */
    double valid, missing;	/* Valid, missing counts. */
    struct moments *moments;    /* Moments. */
    double min, max;            /* Maximum and mimimum values. */
    double stats[DSC_N_STATS];	/* All the stats' values. */
  };

/* Output format. */
enum dsc_format
  {
    DSC_LINE,           /* Abbreviated format. */
    DSC_SERIAL          /* Long format. */
  };

/* A DESCRIPTIVES procedure. */
struct dsc_proc
  {
    /* Per-variable info. */
    struct dsc_var *vars;       /* Variables. */
    size_t var_cnt;             /* Number of variables. */

    /* User options. */
    enum dsc_missing_type missing_type; /* Treatment of missing values. */
    enum mv_class exclude;      /* Classes of missing values to exclude. */
    int show_var_labels;        /* Nonzero to show variable labels. */
    int show_index;             /* Nonzero to show variable index. */
    enum dsc_format format;     /* Output format. */

    /* Accumulated results. */
    double missing_listwise;    /* Sum of weights of cases missing listwise. */
    double valid;               /* Sum of weights of valid cases. */
    bool bad_warn;               /* Warn if bad weight found. */
    enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
    int sort_ascending;         /* !0: ascending order; 0: descending. */
    unsigned long show_stats;   /* Statistics to display. */
    unsigned long calc_stats;   /* Statistics to calculate. */
    enum moment max_moment;     /* Highest moment needed for stats. */
  };

/* Parsing. */
static enum dsc_statistic match_statistic (struct lexer *);
static void free_dsc_proc (struct dsc_proc *);

/* Z-score functions. */
static bool try_name (const struct dictionary *dict,
		      struct dsc_proc *dsc, const char *name);
static bool generate_z_varname (const struct dictionary *dict,
				struct dsc_proc *dsc, char *z_name,
				const char *name, int *z_cnt);
static void dump_z_table (struct dsc_proc *);
static void setup_z_trns (struct dsc_proc *, struct dataset *);

/* Procedure execution functions. */
static void calc_descriptives (struct dsc_proc *, struct casereader *,
                               struct dataset *);
static void display (struct dsc_proc *dsc);

/* Parser and outline. */

/* Handles DESCRIPTIVES. */
int
cmd_descriptives (struct lexer *lexer, struct dataset *ds)
{
  struct dictionary *dict = dataset_dict (ds);
  struct dsc_proc *dsc;
  const struct variable **vars = NULL;
  size_t var_cnt = 0;
  int save_z_scores = 0;
  int z_cnt = 0;
  size_t i;
  bool ok;

  struct casegrouper *grouper;
  struct casereader *group;

  /* Create and initialize dsc. */
  dsc = xmalloc (sizeof *dsc);
  dsc->vars = NULL;
  dsc->var_cnt = 0;
  dsc->missing_type = DSC_VARIABLE;
  dsc->exclude = MV_ANY;
  dsc->show_var_labels = 1;
  dsc->show_index = 0;
  dsc->format = DSC_LINE;
  dsc->missing_listwise = 0.;
  dsc->valid = 0.;
  dsc->bad_warn = 1;
  dsc->sort_by_stat = DSC_NONE;
  dsc->sort_ascending = 1;
  dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;

  /* Parse DESCRIPTIVES. */
  while (lex_token (lexer) != '.')
    {
      if (lex_match_id (lexer, "MISSING"))
        {
          lex_match (lexer, '=');
          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
            {
              if (lex_match_id (lexer, "VARIABLE"))
                dsc->missing_type = DSC_VARIABLE;
              else if (lex_match_id (lexer, "LISTWISE"))
                dsc->missing_type = DSC_LISTWISE;
              else if (lex_match_id (lexer, "INCLUDE"))
                dsc->exclude = MV_SYSTEM;
              else
                {
                  lex_error (lexer, NULL);
                  goto error;
                }
              lex_match (lexer, ',');
            }
        }
      else if (lex_match_id (lexer, "SAVE"))
        save_z_scores = 1;
      else if (lex_match_id (lexer, "FORMAT"))
        {
          lex_match (lexer, '=');
          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
            {
              if (lex_match_id (lexer, "LABELS"))
                dsc->show_var_labels = 1;
              else if (lex_match_id (lexer, "NOLABELS"))
                dsc->show_var_labels = 0;
              else if (lex_match_id (lexer, "INDEX"))
                dsc->show_index = 1;
              else if (lex_match_id (lexer, "NOINDEX"))
                dsc->show_index = 0;
              else if (lex_match_id (lexer, "LINE"))
                dsc->format = DSC_LINE;
              else if (lex_match_id (lexer, "SERIAL"))
                dsc->format = DSC_SERIAL;
              else
                {
                  lex_error (lexer, NULL);
                  goto error;
                }
              lex_match (lexer, ',');
            }
        }
      else if (lex_match_id (lexer, "STATISTICS"))
        {
          lex_match (lexer, '=');
          dsc->show_stats = 0;
          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
            {
              if (lex_match (lexer, T_ALL))
                dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
              else if (lex_match_id (lexer, "DEFAULT"))
                dsc->show_stats |= DEFAULT_STATS;
              else
		dsc->show_stats |= 1ul << (match_statistic (lexer));
              lex_match (lexer, ',');
            }
          if (dsc->show_stats == 0)
            dsc->show_stats = DEFAULT_STATS;
        }
      else if (lex_match_id (lexer, "SORT"))
        {
          lex_match (lexer, '=');
          if (lex_match_id (lexer, "NAME"))
            dsc->sort_by_stat = DSC_NAME;
          else
	    {
	      dsc->sort_by_stat = match_statistic (lexer);
	      if (dsc->sort_by_stat == DSC_NONE )
		dsc->sort_by_stat = DSC_MEAN;
	    }
          if (lex_match (lexer, '('))
            {
              if (lex_match_id (lexer, "A"))
                dsc->sort_ascending = 1;
              else if (lex_match_id (lexer, "D"))
                dsc->sort_ascending = 0;
              else
                lex_error (lexer, NULL);
              lex_force_match (lexer, ')');
            }
        }
      else if (var_cnt == 0)
        {
          if (lex_look_ahead (lexer) == '=')
            {
              lex_match_id (lexer, "VARIABLES");
              lex_match (lexer, '=');
            }

          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
            {
              int i;

              if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
                                    PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
		goto error;

              dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
              for (i = dsc->var_cnt; i < var_cnt; i++)
                {
                  struct dsc_var *dv = &dsc->vars[i];
                  dv->v = vars[i];
                  dv->z_name[0] = '\0';
                  dv->moments = NULL;
                }
              dsc->var_cnt = var_cnt;

              if (lex_match (lexer, '('))
                {
                  if (lex_token (lexer) != T_ID)
                    {
                      lex_error (lexer, NULL);
                      goto error;
                    }
                  if (try_name (dict, dsc, lex_tokid (lexer)))
                    {
                      strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
                      z_cnt++;
                    }
                  else
                    msg (SE, _("Z-score variable name %s would be"
                               " a duplicate variable name."), lex_tokid (lexer));
                  lex_get (lexer);
                  if (!lex_force_match (lexer, ')'))
		    goto error;
                }
            }
        }
      else
        {
          lex_error (lexer, NULL);
          goto error;
        }

      lex_match (lexer, '/');
    }
  if (var_cnt == 0)
    {
      msg (SE, _("No variables specified."));
      goto error;
    }

  /* Construct z-score varnames, show translation table. */
  if (z_cnt || save_z_scores)
    {
      if (save_z_scores)
        {
          int gen_cnt = 0;

          for (i = 0; i < dsc->var_cnt; i++)
            if (dsc->vars[i].z_name[0] == 0)
              {
                if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
                                         var_get_name (dsc->vars[i].v),
                                         &gen_cnt))
                  goto error;
                z_cnt++;
              }
        }
      dump_z_table (dsc);
    }

  /* Figure out statistics to display. */
  if (dsc->show_stats & (1ul << DSC_SKEWNESS))
    dsc->show_stats |= 1ul << DSC_SESKEW;
  if (dsc->show_stats & (1ul << DSC_KURTOSIS))
    dsc->show_stats |= 1ul << DSC_SEKURT;

  /* Figure out which statistics to calculate. */
  dsc->calc_stats = dsc->show_stats;
  if (z_cnt > 0)
    dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
  if (dsc->sort_by_stat >= 0)
    dsc->calc_stats |= 1ul << dsc->sort_by_stat;
  if (dsc->show_stats & (1ul << DSC_SESKEW))
    dsc->calc_stats |= 1ul << DSC_SKEWNESS;
  if (dsc->show_stats & (1ul << DSC_SEKURT))
    dsc->calc_stats |= 1ul << DSC_KURTOSIS;

  /* Figure out maximum moment needed and allocate moments for
     the variables. */
  dsc->max_moment = MOMENT_NONE;
  for (i = 0; i < DSC_N_STATS; i++)
    if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
      dsc->max_moment = dsc_info[i].moment;
  if (dsc->max_moment != MOMENT_NONE)
    for (i = 0; i < dsc->var_cnt; i++)
      dsc->vars[i].moments = moments_create (dsc->max_moment);

  /* Data pass. */
  grouper = casegrouper_create_splits (proc_open (ds), dict);
  while (casegrouper_get_next_group (grouper, &group))
    calc_descriptives (dsc, group, ds);
  ok = casegrouper_destroy (grouper);
  ok = proc_commit (ds) && ok;

  /* Z-scoring! */
  if (ok && z_cnt)
    setup_z_trns (dsc, ds);

  /* Done. */
  free (vars);
  free_dsc_proc (dsc);
  return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;

 error:
  free (vars);
  free_dsc_proc (dsc);
  return CMD_FAILURE;
}

/* Returns the statistic named by the current token and skips past the token.
   Returns DSC_NONE if no statistic is given (e.g., subcommand with no
   specifiers). Emits an error if the current token ID does not name a
   statistic. */
static enum dsc_statistic
match_statistic (struct lexer *lexer)
{
  if (lex_token (lexer) == T_ID)
    {
      enum dsc_statistic stat;

      for (stat = 0; stat < DSC_N_STATS; stat++)
        if (lex_match_id (lexer, dsc_info[stat].identifier))
	  return stat;

      lex_get (lexer);
      lex_error (lexer, _("expecting statistic name: reverting to default"));
    }

  return DSC_NONE;
}

/* Frees DSC. */
static void
free_dsc_proc (struct dsc_proc *dsc)
{
  size_t i;

  if (dsc == NULL)
    return;

  for (i = 0; i < dsc->var_cnt; i++)
    moments_destroy (dsc->vars[i].moments);
  free (dsc->vars);
  free (dsc);
}

/* Z scores. */

/* Returns false if NAME is a duplicate of any existing variable name or
   of any previously-declared z-var name; otherwise returns true. */
static bool
try_name (const struct dictionary *dict, struct dsc_proc *dsc,
	  const char *name)
{
  size_t i;

  if (dict_lookup_var (dict, name) != NULL)
    return false;
  for (i = 0; i < dsc->var_cnt; i++)
    if (!strcasecmp (dsc->vars[i].z_name, name))
      return false;
  return true;
}

/* Generates a name for a Z-score variable based on a variable
   named VAR_NAME, given that *Z_CNT generated variable names are
   known to already exist.  If successful, returns true and
   copies the new name into Z_NAME.  On failure, returns false. */
static bool
generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
                    const char *var_name, int *z_cnt)
{
  char name[VAR_NAME_LEN + 1];

  /* Try a name based on the original variable name. */
  name[0] = 'Z';
  str_copy_trunc (name + 1, sizeof name - 1, var_name);
  if (try_name (dict, dsc, name))
    {
      strcpy (z_name, name);
      return true;
    }

  /* Generate a synthetic name. */
  for (;;)
    {
      (*z_cnt)++;

      if (*z_cnt <= 99)
	sprintf (name, "ZSC%03d", *z_cnt);
      else if (*z_cnt <= 108)
	sprintf (name, "STDZ%02d", *z_cnt - 99);
      else if (*z_cnt <= 117)
	sprintf (name, "ZZZZ%02d", *z_cnt - 108);
      else if (*z_cnt <= 126)
	sprintf (name, "ZQZQ%02d", *z_cnt - 117);
      else
	{
	  msg (SE, _("Ran out of generic names for Z-score variables.  "
		     "There are only 126 generic names: ZSC001-ZSC0999, "
		     "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
	  return false;
	}

      if (try_name (dict, dsc, name))
	{
	  strcpy (z_name, name);
	  return true;
	}
    }
  NOT_REACHED();
}

/* Outputs a table describing the mapping between source
   variables and Z-score variables. */
static void
dump_z_table (struct dsc_proc *dsc)
{
  size_t cnt = 0;
  struct tab_table *t;

  {
    size_t i;

    for (i = 0; i < dsc->var_cnt; i++)
      if (dsc->vars[i].z_name[0] != '\0')
	cnt++;
  }

  t = tab_create (2, cnt + 1, 0);
  tab_title (t, _("Mapping of variables to corresponding Z-scores."));
  tab_columns (t, SOM_COL_DOWN, 1);
  tab_headers (t, 0, 0, 1, 0);
  tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
  tab_hline (t, TAL_2, 0, 1, 1);
  tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
  tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
  tab_dim (t, tab_natural_dimensions, NULL);

  {
    size_t i, y;

    for (i = 0, y = 1; i < dsc->var_cnt; i++)
      if (dsc->vars[i].z_name[0] != '\0')
	{
	  tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
	  tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
	}
  }

  tab_submit (t);
}

/* Transformation function to calculate Z-scores. Will return SYSMIS if any of
   the following are true: 1) mean or standard deviation is SYSMIS 2) score is
   SYSMIS 3) score is user missing and they were not included in the original
   analyis. 4) any of the variables in the original analysis were missing
   (either system or user-missing values that weren't included).
*/
static int
descriptives_trns_proc (void *trns_, struct ccase **c,
                        casenumber case_idx UNUSED)
{
  struct dsc_trns *t = trns_;
  struct dsc_z_score *z;
  const struct variable **vars;
  int all_sysmis = 0;

  if (t->missing_type == DSC_LISTWISE)
    {
      assert(t->vars);
      for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
	{
	  double score = case_num (*c, *vars);
	  if (var_is_num_missing (*vars, score, t->exclude))
	    {
	      all_sysmis = 1;
	      break;
	    }
	}
    }

  *c = case_unshare (*c);
  for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
    {
      double input = case_num (*c, z->src_var);
      double *output = &case_data_rw (*c, z->z_var)->f;

      if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
          || var_is_num_missing (z->src_var, input, t->exclude))
	*output = SYSMIS;
      else
	*output = (input - z->mean) / z->std_dev;
    }
  return TRNS_CONTINUE;
}

/* Frees a descriptives_trns struct. */
static bool
descriptives_trns_free (void *trns_)
{
  struct dsc_trns *t = trns_;

  free (t->z_scores);
  assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
  free (t->vars);
  return true;
}

/* Sets up a transformation to calculate Z scores. */
static void
setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
{
  struct dsc_trns *t;
  size_t cnt, i;

  for (cnt = i = 0; i < dsc->var_cnt; i++)
    if (dsc->vars[i].z_name[0] != '\0')
      cnt++;

  t = xmalloc (sizeof *t);
  t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
  t->z_score_cnt = cnt;
  t->missing_type = dsc->missing_type;
  t->exclude = dsc->exclude;
  if ( t->missing_type == DSC_LISTWISE )
    {
      t->var_cnt = dsc->var_cnt;
      t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
      for (i = 0; i < t->var_cnt; i++)
	t->vars[i] = dsc->vars[i].v;
    }
  else
    {
      t->var_cnt = 0;
      t->vars = NULL;
    }

  for (cnt = i = 0; i < dsc->var_cnt; i++)
    {
      struct dsc_var *dv = &dsc->vars[i];
      if (dv->z_name[0] != '\0')
	{
          struct dsc_z_score *z;
	  struct variable *dst_var;

	  dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
          var_set_label (dst_var, xasprintf (_("Z-score of %s"),
                                             var_to_string (dv->v)));

          z = &t->z_scores[cnt++];
          z->src_var = dv->v;
          z->z_var = dst_var;
          z->mean = dv->stats[DSC_MEAN];
          z->std_dev = dv->stats[DSC_STDDEV];
	}
    }

  add_transformation (ds,
		      descriptives_trns_proc, descriptives_trns_free, t);
}

/* Statistical calculation. */

static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);

/* Calculates and displays descriptive statistics for the cases
   in CF. */
static void
calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
                   struct dataset *ds)
{
  struct casereader *pass1, *pass2;
  struct ccase *c;
  size_t i;

  c = casereader_peek (group, 0);
  if (c == NULL)
    {
      casereader_destroy (group);
      return;
    }
  output_split_file_values (ds, c);
  case_unref (c);

  group = casereader_create_filter_weight (group, dataset_dict (ds),
                                           NULL, NULL);

  pass1 = group;
  pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);

  for (i = 0; i < dsc->var_cnt; i++)
    {
      struct dsc_var *dv = &dsc->vars[i];

      dv->valid = dv->missing = 0.0;
      if (dv->moments != NULL)
        moments_clear (dv->moments);
      dv->min = DBL_MAX;
      dv->max = -DBL_MAX;
    }
  dsc->missing_listwise = 0.;
  dsc->valid = 0.;

  /* First pass to handle most of the work. */
  for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
    {
      double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);

      /* Check for missing values. */
      if (listwise_missing (dsc, c))
        {
          dsc->missing_listwise += weight;
          if (dsc->missing_type == DSC_LISTWISE)
            continue;
        }
      dsc->valid += weight;

      for (i = 0; i < dsc->var_cnt; i++)
        {
          struct dsc_var *dv = &dsc->vars[i];
          double x = case_num (c, dv->v);

          if (var_is_num_missing (dv->v, x, dsc->exclude))
            {
              dv->missing += weight;
              continue;
            }

          if (dv->moments != NULL)
            moments_pass_one (dv->moments, x, weight);

          if (x < dv->min)
            dv->min = x;
          if (x > dv->max)
            dv->max = x;
        }
    }
  if (!casereader_destroy (pass1))
    {
      casereader_destroy (pass2);
      return;
    }

  /* Second pass for higher-order moments. */
  if (dsc->max_moment > MOMENT_MEAN)
    {
      for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
        {
          double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);

          /* Check for missing values. */
          if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
            continue;

          for (i = 0; i < dsc->var_cnt; i++)
            {
              struct dsc_var *dv = &dsc->vars[i];
              double x = case_num (c, dv->v);

              if (var_is_num_missing (dv->v, x, dsc->exclude))
                continue;

              if (dv->moments != NULL)
                moments_pass_two (dv->moments, x, weight);
            }
        }
      if (!casereader_destroy (pass2))
        return;
    }

  /* Calculate results. */
  for (i = 0; i < dsc->var_cnt; i++)
    {
      struct dsc_var *dv = &dsc->vars[i];
      double W;
      int j;

      for (j = 0; j < DSC_N_STATS; j++)
        dv->stats[j] = SYSMIS;

      dv->valid = W = dsc->valid - dv->missing;

      if (dv->moments != NULL)
        moments_calculate (dv->moments, NULL,
                           &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
                           &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
      if (dsc->calc_stats & (1ul << DSC_SEMEAN)
          && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
        dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
      if (dsc->calc_stats & (1ul << DSC_STDDEV)
          && dv->stats[DSC_VARIANCE] != SYSMIS)
        dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
      if (dsc->calc_stats & (1ul << DSC_SEKURT))
        if (dv->stats[DSC_KURTOSIS] != SYSMIS)
            dv->stats[DSC_SEKURT] = calc_sekurt (W);
      if (dsc->calc_stats & (1ul << DSC_SESKEW)
          && dv->stats[DSC_SKEWNESS] != SYSMIS)
        dv->stats[DSC_SESKEW] = calc_seskew (W);
      dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
                              ? SYSMIS : dv->max - dv->min);
      dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
      dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
      if (dsc->calc_stats & (1ul << DSC_SUM))
        dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
    }

  /* Output results. */
  display (dsc);
}

/* Returns true if any of the descriptives variables in DSC's
   variable list have missing values in case C, false otherwise. */
static bool
listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
{
  size_t i;

  for (i = 0; i < dsc->var_cnt; i++)
    {
      struct dsc_var *dv = &dsc->vars[i];
      double x = case_num (c, dv->v);

      if (var_is_num_missing (dv->v, x, dsc->exclude))
        return true;
    }
  return false;
}

/* Statistical display. */

static algo_compare_func descriptives_compare_dsc_vars;

/* Displays a table of descriptive statistics for DSC. */
static void
display (struct dsc_proc *dsc)
{
  size_t i;
  int nc;
  struct tab_table *t;

  nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
  for (i = 0; i < DSC_N_STATS; i++)
    if (dsc->show_stats & (1ul << i))
      nc++;

  if (dsc->sort_by_stat != DSC_NONE)
    sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
          descriptives_compare_dsc_vars, dsc);

  t = tab_create (nc, dsc->var_cnt + 1, 0);
  tab_headers (t, 1, 0, 1, 0);
  tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
  tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
  tab_hline (t, TAL_2, 0, nc - 1, 1);
  tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
  tab_dim (t, tab_natural_dimensions, NULL);

  nc = 0;
  tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
  if (dsc->format == DSC_SERIAL)
    {
      tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
      tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
    }
  else
    tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");

  for (i = 0; i < DSC_N_STATS; i++)
    if (dsc->show_stats & (1ul << i))
      {
	const char *title = gettext (dsc_info[i].name);
	tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
      }

  for (i = 0; i < dsc->var_cnt; i++)
    {
      struct dsc_var *dv = &dsc->vars[i];
      size_t j;

      nc = 0;
      tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
      tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
      if (dsc->format == DSC_SERIAL)
	tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);

      for (j = 0; j < DSC_N_STATS; j++)
	if (dsc->show_stats & (1ul << j))
	  tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
    }

  tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
	     dsc->valid, dsc->missing_listwise);

  tab_submit (t);
}

/* Compares `struct dsc_var's A and B according to the ordering
   specified by CMD. */
static int
descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
{
  const struct dsc_var *a = a_;
  const struct dsc_var *b = b_;
  const struct dsc_proc *dsc = dsc_;

  int result;

  if (dsc->sort_by_stat == DSC_NAME)
    result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
  else
    {
      double as = a->stats[dsc->sort_by_stat];
      double bs = b->stats[dsc->sort_by_stat];

      result = as < bs ? -1 : as > bs;
    }

  if (!dsc->sort_ascending)
    result = -result;

  return result;
}
