/* File "generation.c":
 * The generation commands for malaga. */

/* This file is part of Malaga, a system for Left Associative Grammars.
 * Copyright (C) 1995-1998 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "input.h"
#include "commands.h"
#include "instr_type.h"
#include "rule_type.h"
#include "rules.h"
#include "tries.h"
#include "lex_type.h"
#include "lexicon.h"
#include "analysis.h"
#include "debugger.h"

#ifdef HANGUL
#include "hangul.h"
#endif

#undef GLOBAL
#define GLOBAL
#include "generation.h"

/* constants ================================================================*/

#define MAX_SEGMENTS 40 /* hard limit on segments in a word form */
#define ITEMS_MAX 40    /* maximum size of <items> */
#define STRING_BUFFER_SIZE 1000 /* used in "print_surface" */

/* types ====================================================================*/

typedef struct GEN_STATE_T /* a running generation LAG state */
{
  struct GEN_STATE_T *next; /* next <gen_state> in this list */
  value_t cat;              /* result category of this state */
  long_t  rule_set;         /* rule_set of this state */
} gen_state_t;

typedef struct ITEM_CAT_T /* a category node for an item */
{
  value_t value;           /* category value */
  struct ITEM_CAT_T *next; /* next category */
} item_cat_t;

/* variables ================================================================*/

LOCAL grammar_t gen_grammar; /* grammar used for generation */
LOCAL long_t max_segments;   /* user limit on segments in a word form */
LOCAL long_t num_results;    /* index of the current word form */
LOCAL long_t num_segments;   /* current number of segments */

LOCAL struct /* information for each segment of generated word or sentence */
{
  string_t surface;            /* surface of this segment */
  gen_state_t *running_states; /* list of running states after combination */
} segments[MAX_SEGMENTS];

LOCAL struct /* segments a word or sentence may consist of */
{
  string_t surf;    /* surface of this item */
  item_cat_t *cats; /* categories for this item */
} items[ITEMS_MAX];

LOCAL long_t num_items; /* current size of <items> */

/* functions used by generation =============================================*/

GLOBAL void print_surface (surface_t surface_type)
/* Print current generated surface. */
{
  char string_buffer[STRING_BUFFER_SIZE];
  string_t string;
  long_t i;

  string = string_buffer;

  if (surface_type == RIGHT_SURFACE)
    i = num_segments - 1;
  else
    i = 0;
  while (TRUE)
  {
    string = copy_string (string, segments[i].surface, 
			  string_buffer + STRING_BUFFER_SIZE);
    
    i++;
    if (i >= num_segments
	|| (surface_type == LEFT_SURFACE && i >= num_segments - 1))
      break;
      
    if (gen_grammar == SYNTAX)
      string = copy_string (string, " ", string_buffer + STRING_BUFFER_SIZE);
  }
  
  printf ("\"%s\"", DECODED_STRING (string_buffer));
}

/*---------------------------------------------------------------------------*/

LOCAL void generation_add_end_state (value_t cat)
/* Print end state, consisting of <cat>. */
{
  if (num_segments > 0) 
  {
    /* Print index of word form. */
    num_results++;
    printf ("%ld: ", num_results);
    print_surface (RESULT_SURFACE);
    printf ("\n");
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void generation_add_running_state (value_t cat, long_t rule_set)
/* Add running state, consisting of <cat> and <rule_set>. */
{
  gen_state_t **gen_state_ptr;
  gen_state_t *new_gen_state;

  new_gen_state = (gen_state_t *) new_mem (sizeof (gen_state_t));
  new_gen_state->cat = new_value (cat);
  new_gen_state->rule_set = rule_set;
  new_gen_state->next = NULL;

  /* Add state to current segment */
  gen_state_ptr = &segments[num_segments-1].running_states;
  while (*gen_state_ptr != NULL)
    gen_state_ptr = &(*gen_state_ptr)->next;
  *gen_state_ptr = new_gen_state;
}

/*---------------------------------------------------------------------------*/

LOCAL void execute_rules (gen_state_t *states,
			  long_t segments_so_far,
			  string_t surf, 
			  value_t category, 
			  grammar_t grammar)
/* Execute combi_rules to combine right surface <surf> with category <category>
 * with all states that consist of <segments_so_far>. */
{
  rule_sys_t *rule_sys = rule_system[grammar];
  gen_state_t *state;

  for (state = states; state != NULL; state = state->next)
  {
    long_t *rule_ptr;
    
    for (rule_ptr = rule_sys->rule_sets + state->rule_set;
	 *rule_ptr != -1; 
	 rule_ptr++)
    {
      rule_t *rule = rule_sys->rules + *rule_ptr;
      
      if ((*surf == EOS) == (rule->type == END_RULE))
      {
	num_segments = segments_so_far;
	clear_value_heap ();
	gen_grammar = grammar;
	execute_rule (rule_sys, *rule_ptr, state->cat, category, 
		      string_to_value (surf, NULL),
		      double_to_value (num_segments));
      }
    }
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void free_states (gen_state_t *states)
/* Free all states in the list beginning with <states>. */
{
  gen_state_t *state;
  
  state = states;
  while (state != NULL)
  {
    gen_state_t *next_state = state->next;
    
    free (state->cat);
    free (state);
    state = next_state;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void generate_local (gen_state_t *states,
			   long_t segments_so_far,
			   grammar_t grammar)
/* Generate all word forms or sentences (according to <grammar>)
 * that are successors of <states> and print them immediately. */
{
  long_t i;

  if (states == NULL)
    return;

  check_user_break ();

  /* Execute the end_rules. */
  execute_rules (states, segments_so_far, "", NULL, grammar);

  /* Don't execute combi_rules if too many segments are to be combined. */
  if (segments_so_far >= max_segments)
    return;

  /* Execute rules with all <items>. */
  for (i = 0; i < num_items; i++)
  {
    item_cat_t *cat;
    
    segments[segments_so_far].running_states = NULL;
    segments[segments_so_far].surface = items[i].surf;
    
    for (cat = items[i].cats; cat != NULL; cat = cat->next)
      execute_rules (states, segments_so_far + 1, items[i].surf, cat->value,
		     grammar);
    
    generate_local (segments[segments_so_far].running_states, 
		    segments_so_far + 1, 
		    grammar);
    free_states (segments[segments_so_far].running_states);
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void generate (grammar_t grammar)
/* Generate a sentence or a word form */
{
  gen_state_t *gen_state;
  rule_sys_t *rule_sys = rule_system[grammar];

  /* Reset result counter. */
  num_results = 0;

  /* Enter initial gen-state */
  gen_state = (gen_state_t *) new_mem (sizeof (gen_state_t));
  gen_state->next = NULL;
  gen_state->cat = new_value (rule_sys->values + rule_sys->initial_cat);
  gen_state->rule_set = rule_sys->initial_rule_set;
  
  /* Set callback routines for "execute_rule". */
  add_running_state = generation_add_running_state;
  add_end_state = generation_add_end_state;
	  
  set_debug_mode (RUN_MODE, NULL);
  in_analysis = FALSE;

  generate_local (gen_state, 0, grammar);
  
  free (gen_state->cat);
  free (gen_state);
}

/*---------------------------------------------------------------------------*/

LOCAL void generate_command (grammar_t grammar, string_t arguments)
/* Generate sentences or words from items, depending on <grammar>. */
{
  long_t i;
      
  if (in_debugger)
    error ("in debug mode");

  if (rule_system[grammar] == NULL)
    error ("rule file not loaded");

  max_segments = parse_integer (&arguments);
  if (max_segments < 1 || max_segments > MAX_SEGMENTS)
    error ("can't generate that many segments");

  if (*arguments != EOS) /* Only read new items if there are any. */
  {
    /* Free items that are allocated from last "generate_command". */
    for (i = 0; i < num_items; i++)
      free (items[i].surf);
    
    num_items = 0;
    while (*arguments != EOS) 
    {
      if (num_items == ITEMS_MAX)
	error ("too many items");
#ifdef HANGUL
      {
	string_t argument;
	
	argument = parse_word (&arguments);
	items[num_items].surf = new_string (ENCODED_STRING (argument));
	free (argument);
      }
#else
      items[num_items].surf = parse_word (&arguments);
#endif
      items[num_items].cats = NULL;
      num_items++;
    }
  }

  /* Create categories for <items>. */
  for (i = 0; i < num_items; i++)
  {
    item_cat_t **cat_ptr;
    
    cat_ptr = &items[i].cats;
    *cat_ptr = NULL;
    if (grammar == MORPHOLOGY)
    {
      string_t surf_end;
      long_t trie_node;
      long_t cat_list_index;
      
      surf_end = items[i].surf;
      trie_node = lexicon.trie_root;
      while (lookup_trie (lexicon.trie,
			  &trie_node, &surf_end, &cat_list_index))
      {
	if (*surf_end == EOS)
	  break;
      }
      
      if (*surf_end == EOS)
      {
	bool_t last_category;
	
	/* Add all categories of that allomorph. */
	do 
	{ 
	  long_t cat_index;
	  
	  cat_index = lexicon.cat_lists[cat_list_index];
	  if (cat_index < 0)
	  {
	    cat_index = - cat_index - 1;
	    last_category = TRUE;
	  }
	  else
	    last_category = FALSE;
	  
	  *cat_ptr = (item_cat_t *) new_mem (sizeof (item_cat_t));
	  (*cat_ptr)->value = new_value (lexicon.values + cat_index);
	  cat_ptr = &(*cat_ptr)->next;
	  
	  cat_list_index++;
	} while (! last_category);
      }
    }
    else
    {
      value_t morph_result;
      
      /* Call morphological analysis to get right-categories. */
      analyse (MORPHOLOGY, items[i].surf, FALSE, TRUE);
      
      /* Add all morphological results to category list. */
      cat_ptr = &items[i].cats;
      reset_analysis_results ();
      for (morph_result = get_next_analysis_result ();
	   morph_result != NULL;
	   morph_result = get_next_analysis_result ())
      {
	*cat_ptr = (item_cat_t *) new_mem (sizeof (item_cat_t));
	(*cat_ptr)->value = new_value (morph_result);
	cat_ptr = &(*cat_ptr)->next;
      }
    }
  }
  
  generate (grammar);
  
  /* Free categories of all items. */
  for (i = 0; i < num_items; i++)
  {
    item_cat_t *cat;
    
    cat = items[i].cats;
    while (cat != NULL)
    {
      item_cat_t *next_cat = cat->next;
      
      free (cat->value);
      free (cat);
      cat = next_cat;
    }
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void do_mg (string_t arguments)
/* Generate morphologically. */
{
  generate_command (MORPHOLOGY, arguments);
}

GLOBAL command_t mg_command =
{
  "mg", do_mg,
  "Generate all word forms that consist only of the given allomorphs.\n"
  "Arguments:\n"
  "  <max_num_of_allos> <allomorph> ... -- use <allomorph> ...\n"
  "  <max_num_of_allos> -- use allomorphs of last generation command\n"
  "\"mg\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/

LOCAL void do_sg (string_t arguments)
/* Generate syntactically. */
{
  generate_command (SYNTAX, arguments);
}

GLOBAL command_t sg_command =
{
  "sg", do_sg,
  "Generate all sentences that consist only of the given word forms.\n"
  "Arguments:\n"
  "  <max_num_of_words> <word> ... -- use <word> ...\n"
  "  <max_num_of_words> -- use words of last generation command\n"
  "\"sg\" can't be used in debug mode.\n"
};

/*---------------------------------------------------------------------------*/
