#****************************************************************************
#  ##   ##         #####   #####  ##     **        NoSQL RDBMS - row        *
#  ###  ##        ####### ####### ##     **      $Revision: 2.1 $			*
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **      Carlo Strozzi (c) 1998     *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Written by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
#
#  Selects table rows based on awk expressions using column names.
#
#  Selects table rows that match a specified awk expression, applied
#  to column names.
#
#  If the input table contains duplicated columns, with the same name
#  but different values, only the first (leftmost) one is taken into
#  account, but the output table will still have the duplicates.
#
#  If no expression is specified, then nothing is printed. The awk
#  expression must be enclosed in single quotes, to prevent the shell from
#  parsing it.
#
#  The following names are reserved to the awk language, and should not
#  be used to indicate column names:
#
#  BEGIN, END, break, continue, else, exit, exp, for, getline, if, in,
#  index, int, length, log, next, print, printf, split, sprintf, sqrt,
#  substr, while, and possibly others, depending on the implementation
#  of your awk (i.e. mawk, gawk, etc.). Refer to the man page and the
#  documentation of you awk interpeter.
#
#  This NoSQL operator reads a table from STDIN and writes a
#  table to STDOUT.
#
########################################################################

########################################################################
# BEGIN block
########################################################################

BEGIN \
{
  NULL = ""; FS = OFS = "\t";
  if ( __nosql_args == NULL )  exit
  split( __nosql_args, args, " " )

  # Note: 'for(i in args)' does not work here, as it is non-deterministic
  # and the resulting AWK program may be broken.
  while ( args[++i] != NULL )
  {
	if ( args[i] == "-x" || args[i] == "--debug" ) debug = 1
	else awk_program = awk_program " " args[i]
  }
  if ( awk_program == NULL )  exit
}

########################################################################
# Main loop
########################################################################

# Table header
r == 0 \
{
  # Start building the back-end awk program.

  awkpgm = "'BEGIN{FS=OFS=\"\\t\";}"

  # Do not let duplicated input columns fool us.
  for ( col = 1; col <= NF; col++ )
  {
	if ( col_names[$(col)] == NULL)
	{
	  col_names[$(col)] = $(col)
	  awk_col[$(col)] = "$(" col ")"
	}
  }

  awkpgm = awkpgm parser( awk_program ) " {print}'"

  unix_cmd = "${NSQAWK:-awk} " awkpgm

  if ( debug )
  {
	print "\n" awk_program "\n" unix_cmd "\n" > "/dev/stderr"
  }

  # Print header.
  print; gsub( /[^\t]/, "-" ); print

  # Make sure we print the header before calling awk(1) again.
  fflush()

  r++ ; next
}

# Dashline.
r == 1 { r++ ; next }

# Table body.
{ print |unix_cmd }

# Parser function

function parser( in_pgm,			i, var_name, col_pos, p_length, \
									a, quoted, out_pgm )
{
  p_length = split( in_pgm, a, "" )

  # The next statement is necessary, to make sure the last section 
  # of the parser is entered once even after the end of the input AWK
  # program.

  p_length++

  while ( ++i <= p_length )
  {
	if ( a[i] == "\"" && a[i-1] != "\\" )
	{
	  if ( quoted ) quoted = 0
	  else quoted = 1
	}
	if ( quoted ) { out_pgm = out_pgm a[i] ; continue }
	if ( a[i] ~ /[A-Za-z_]/ ) { var_name = var_name a[i] ; continue }
	if ( var_name != NULL )
	{
	  if ( a[i] ~ /[0-9]/ ) { var_name = var_name a[i] ; continue }
	  col_pos = awk_col[var_name]
	  if ( col_pos != NULL ) { out_pgm = out_pgm col_pos }
	  else out_pgm = out_pgm var_name
	  var_name = NULL
	}
	out_pgm = out_pgm a[i]
  }
  return out_pgm
}

