#****************************************************************************
#  ##   ##         #####   #####  ##     **        NoSQL RDBMS - summ       *
#  ###  ##        ####### ####### ##     **        $Revision: 2.1 $			*
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **      Carlo Strozzi (c) 1998     *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Adapted by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# Original code: summ,v 2.6 1994/11/05 12:18:40 hobbs
#****************************************************************************

$0 =~ s-.*/-- ;
while ( $ARGV[0] =~ /^-/ ) {				# Get args
    $_ = shift ;
    if( /^-c(.*)/ ){
	$v = $1 ;
	if( $v =~ /^u/ ){ $CUNIQ++ ; $SAV++ ; }
	if( $v =~ /^un/ ){ $CUNUL++ ; }
	if( $v =~ /^uu/ ){ $CUQBY++ ; }
	if( $v =~ /^u2/ ){ $CUQBY++ ; $CU2++ ; }
	next ; }
    if( /^--count-(.*)/ ){
	$v = $1 ;
	if( $v =~ /^unique/ ){ $CUNIQ++ ; $SAV++ ; }
	if( $v =~ /^unique-null/ ){ $CUNUL++ ; }
	if( $v =~ /^unique-each/ ){ $CUQBY++ ; }
	if( $v =~ /^unique-multiple/ ){ $CUQBY++ ; $CU2++ ; }
	next ; }
    if( /^-m.*/ || /^--compute$/ ){ $MAM++ ; $SAV++ ; next ; }
    if( /^-v.*/ || /^--revert$/ ){ $INV++ ; next ; }
    die "\n$0: unknown option: $_\n" ; 
}
while(<STDIN>){
    next if /^\s*#/ ;			# comment
    chop ;
    @F = split( /\t/, $_ );
    if( ++$lln < 3 ){
	if( $lln == 1 ){			# col name line
	    @H = @F ; # save headers
	    @ARGV = @H if ! @ARGV && ! $INV ;
	    &get_col_x ; }		# get, chk column indexes.
	next ; }
    $rows++ ;				# data row count
    if( $SAV ){
	for (@a){
	    $x = "$_|$F[$_]" ;
	    if( ! $q{$x} ){ $cu[$_]++ ; }	# unique count
	    $q{$x}++ ;				# value count
	}
    }
}
print "Rows: $rows\n" ;
if( $CUNIQ ){				# count of unique stuff
    for (@a){
	print "Unique values for $H[$_]: $cu[$_]\n" ;
	next if ! $CUQBY && ! $CUNUL ;
	for $k ( sort( keys %q )){
	    @t = split( /\|/, $k );
	    if( $t[0] eq $_ ){
		if( $t[1] eq "" ){ $t[1] = "(null)" ; }
		if( $t[1] =~ /^\s+$/ ){ $t[1] = "(blank)" ; }
		if( $CUNUL ){
		    last if $t[1] ne "(null)" && $t[1] ne "(blank)" ; }
		if( $CU2 ){
		    next unless $q{$k} > 1 ; }
		printf( "%8d %s\n", $q{$k}, $t[1] ) ;
	    }
	}
    }
}
if( $MAM ){				# min, avg, max 
    for (@a){
	$n = $sum = $max = 0 ;
	$min = 2e31 -1 ;
	for $k ( sort( keys %q )){
	    @t = split( /\|/, $k );
	    if( $t[0] eq $_ ){
		$sum += $t[1] * $q{$k} ;
		$n += $q{$k} ;
		$min = $t[1] if $t[1] < $min ;
		$max = $t[1] if $t[1] > $max ;
	    }
	}
	$avg = $sum / $n if $n ;
	printf( "Min, Avg, Max, Total for %s: %d, %d, %d, %d\n",
	    $H[$_], $min, $avg, $max, $sum ) ;
    }
}
sub get_col_x {		# get, chk column indexes, inc -v, die if bad column
			# uses @H, $INV, put indexes in @a.
			# modified for nsq-summ.
    local( $f, $ok, @nn ) ;
    for $arg (@ARGV){
	for( $ok=$f=0 ; $f < @H ; $f++ ){
	    if( $arg eq $H[$f] ){	# match existing column
		$ok++ ;
		push( @a, $f );
		last ; }
	}
	die "\n$0: bad column name: $arg\n" if ! $ok ;
    }
    if( $INV ){					# inverse option
	loop: for( $f=0 ; $f < @H ; $f++ ){
	    for $i (@a){
		next loop if $i eq $f ; }
	    push( @nn, $f ); }
	@a = @nn ;
    }
}
