#****************************************************************************
#  ##   ##         #####   #####  ##     **      NoSQL RDBMS - compute      *
#  ###  ##        ####### ####### ##     **      $Revision: 2.1 $			*
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **      Carlo Strozzi (c) 1998     *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
#
#  Applies arbitrary awk expressions contained in a file, using column names.
#
#  If the input table contains duplicated columns, i.e. columns with the        
#  same name but possibly different values, only the first (leftmost) one       
#  is taken into account. The output will still have the duplicates, but
#  this time with equal column values.
#
#  If no file name is specified, then the input table is printed to
#  STDOUT unchanged.
#
#  The following names are reserved to the awk language, and should not
#  be used to indicate column names:
#
#  BEGIN, END, break, continue, else, exit, exp, for, getline, if, in,
#  index, int, length, log, next, print, printf, split, sprintf, sqrt,
#  substr, while, and possibly others, depending on the implementation
#  of your awk (i.e. mawk, gawk, etc.). Refer to the man page and the
#  documentation of you awk interpeter.
#
#  This NoSQL operator reads a table from STDIN and writes a
#  table to STDOUT.
#
########################################################################

########################################################################
# BEGIN block
########################################################################

BEGIN \
{
  NULL = ""; FS = OFS = "\t";
  split( __nosql_args, args, " " )

  # Note: 'for(i in args)' does not work here, as it is non-deterministic
  # and the resulting AWK program may be broken.

  while ( args[++i] != NULL )
  {
	if ( args[i] == "-x" || args[i] == "--debug" ) debug = 1
	if ( args[i] == "-f" || args[i] == "--file" ) continue
	else awk_program = args[i]
  }
  if ( awk_program == NULL || __nosql_tmpfile == NULL )  exit
}

########################################################################
# Main loop
########################################################################

# Table header
r == 0 \
{

  # Start building the converted awk program file.

  printf("BEGIN{FS=OFS=\"\\t\";}{") > __nosql_tmpfile

  # Do not let duplicated input columns fool us.

  for ( col = 1; col <= NF; col++ )
  {
	if ( col_names[$(col)] == NULL )
	{
	  col_names[$(col)] = $(col)
	  awk_col[$(col)] = "$(" col ")"
	}
  }

  while ( getline in_rec < awk_program > 0 )
	printf("%s\n", parser( in_rec )) >> __nosql_tmpfile

  printf("}{print}\n") >> __nosql_tmpfile

  close( __nosql_tmpfile )
  if ( debug )
  {
	printf("\n") > "/dev/stderr"
	while ( getline out_rec < __nosql_tmpfile > 0 )
	  print out_rec > "/dev/stderr"
	printf("\n") > "/dev/stderr"
  }

  unix_cmd = "${NSQAWK:-awk} -f " __nosql_tmpfile

  # Print header.

  print; gsub( /[^\t]/, "-" ); print

  # Make sure we print the header before calling awk(1) again.
  fflush()

  r++ ; next
}

# Dashline.
r == 1 { r++ ; next }

# Table body.
{ print |unix_cmd }

# Parser function

function parser( in_pgm,			i, var_name, col_pos, p_length, \
									a, quoted, out_pgm )
{
  p_length = split( in_pgm, a, "" )

  # The next statement is necessary, to make sure the last section 
  # of the parser is entered once even after the end of the input AWK
  # program.

  p_length++

  while ( ++i <= p_length )
  {
	if ( a[i] == "\"" && a[i-1] != "\\" )
	{
	  if ( quoted ) quoted=0
	  else quoted=1
	}
	# Next statement accounts for comment lines and inline comments.
	if ( a[i] == "#" && !quoted ) return out_pgm
	if ( quoted ) { out_pgm = out_pgm a[i] ; continue }
	if ( a[i] ~ /[A-Za-z_]/ ) { var_name = var_name a[i] ; continue }
	if ( var_name != NULL )
	{
	  if ( a[i] ~ /[0-9]/ ) { var_name = var_name a[i] ; continue }
	  col_pos = awk_col[var_name]
	  if ( col_pos != NULL ) { out_pgm = out_pgm col_pos }
	  else out_pgm = out_pgm var_name
	  var_name = NULL
	}
	out_pgm = out_pgm a[i]
  }
  return out_pgm
}

