/*
 * This file is part of the Pablo Performance Analysis Environment
 *
 *                                           TM
 * The Pablo Performance Analysis Environment   software is *not* in
 * the public domain.  However, it is freely available without fee for
 * education, research, and non-profit purposes.  By obtaining copies
 * of this and other files that comprise the Pablo Performance Analysis
 * Environment, you, the Licensee, agree to abide by the following
 * conditions and understandings with respect to the copyrighted software:
 * 
 * 1.  The software is copyrighted in the name of the Board of Trustees
 *     of the University of Illinois (UI), and ownership of the software
 *     remains with the UI. 
 *
 * 2.  Permission to use, copy, and modify this software and its documentation
 *     for education, research, and non-profit purposes is hereby granted
 *     to Licensee, provided that the copyright notice, the original author's
 *     names and unit identification, and this permission notice appear on
 *     all such copies, and that no charge be made for such copies.  Any
 *     entity desiring permission to incorporate this software into commercial
 *     products should contact:
 *
 *          Professor Daniel A. Reed                 reed@cs.uiuc.edu
 *          University of Illinois
 *          Department of Computer Science
 *          2413 Digital Computer Laboratory
 *          1304 West Springfield Avenue
 *          Urbana, Illinois  61801
 *          USA
 *
 * 3.  Licensee may not use the name, logo, or any other symbol of the UI
 *     nor the names of any of its employees nor any adaptation thereof in
 *     advertizing or publicity pertaining to the software without specific
 *     prior written approval of the UI.
 *
 * 4.  THE UI MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THE
 *     SOFTWARE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS
 *     OR IMPLIED WARRANTY.
 *
 * 5.  The UI shall not be liable for any damages suffered by Licensee from
 *     the use of this software.
 *
 * 6.  The software was developed under agreements between the UI and the
 *     Federal Government which entitle the Government to certain rights.
 *
 **************************************************************************
 *
 * Developed by: The TAPESTRY Parallel Computing Laboratory
 *		 University of Illinois at Urbana-Champaign
 *		 Department of Computer Science
 *		 1304 W. Springfield Avenue
 *		 Urbana, IL	61801
 *
 * Copyright (c) 1987-1994
 * The University of Illinois Board of Trustees.
 *	All Rights Reserved.
 *
 * Author: Ruth A. Aydt (aydt@cs.uiuc.edu)
 *
 * Project Manager and Principal Investigator:
 *	Daniel A. Reed (reed@cs.uiuc.edu)
 *
 * Funded by: National Science Foundation grants NSF CCR86-57696,
 * NSF CCR87-06653 and NSF CDA87-22836 (Tapestry), NASA ICLASS Contract
 * No. NAG-1-613, DARPA Contract No. DABT63-91-K-0004, by a grant
 * from the Digital Equipment Corporation External Research Program,
 * and by a collaborative research agreement with the Intel Supercomputer
 * Systems Division.
 *
 */
/*
 * FileStats.C Program to scan an SDDF file and report the min/max
 *	       values for each field and the total count of each
 *	       record type.
 *
 *	       This is accomplished by creating a "Statistics" record
 *             for every Descriptor packet in the input data file. Each
 *	       Descriptor packet introduces a distinct Record Type.  There
 *             is a Statistics record for each Record Type.
 *
 *	       For a given Record Type and Statistics Record: 
 *	       1) The tag of the Statistics Record is the Record Type tag 
 *	          + a fixed value.  
 *	       2) The name of the Statistics Record is the name of the 
 *		  Record Type with a ':' appended.  
 *             3) The first field of the Statistics Record is a scalar integer
 *	          used to maintain a count of the Record Type data records 
 *	          seen in the file.
 *	       4) For every scalar field in the Record Type Descriptor,
 *                the Statistics Record contains a vector field with two
 *		  entries which are used to keep track of the minimum and 
 *		  maximum values seen in the input file for the particular
 *		  field.
 *	       5) For every non-scalar field in the Record Type Descriptor,
 *		  the Statistics Records contains two fields to keep information
 *		  on the input data corresponding to that field.  The first
 *	          of these is a vector field with two entries corresponding to
 *	          the minimum and maximum values seen in any array element of
 *		  the input file for the particular field.  The second
 *                field in the Statistics Record is a vector with two entries
 *                for each dimension of the input field. These entries are 
 *		  used to track the minimim and maximum sizes of each dimension
 *                in the input field.
 *	      
 */
#include <stream.h>
#include <stdlib.h>
#include <string.h>
#include <values.h>

#define MINCHAR	0x00
#define MAXCHAR 0x7f

#ifndef MININT
#define MININT ( 1 << ( BITS(int) - 1 ) )
#endif

#ifndef MAXINT
#define MAXINT ~MININT
#endif

#include "InitializeStatic.C"

#include "PipeReader.h"
#include "InputFileStreamPipe.h"

#include "Attributes.h"
#include "PacketHeader.h"
#include "RecordDictionary.h"
#include "RecordDictionaryPIterator.h"
#include "StructureDescriptor.h"
#include "StructureDescriptorIterator.h"

#define STAT_TAG 10000

extern void buildStatRecord( StructureDescriptor *inSDp, 
			     StructureDescriptor *statSDp );

extern void updateStatRecord( RecordDossier& inDossier, 
			      RecordDossier& statDossier );

extern void printStatRecords( RecordDictionary& RDict );

extern void printValue( const Value& value );

main( int argc, char **argv )
{
 	/****************************************************************
         *    Get the name of the input file, and open the file
         ****************************************************************/
	enum Status       { INVALID, VALID };

	char BUF[512];		
	Status 		     inputFileStatus;
	InputFileStreamPipe  *In;
	PipeReader     	     *InPipe;

	if ( argc > 2 ) {
	   cerr << form( "Usage: %s [ filename ]\n", argv[0] );
	   exit (-1);
	}

	do {
	    if ( argc == 2 ) {
	        strcpy( BUF, argv[1] );
		argc--;
	    } else {
  	        cerr << "Please enter name of the input SSDF file: ";
	        cin >> BUF;
	    }

	    In = new InputFileStreamPipe( BUF );
	    if ( In->successfulOpen() ) {
		inputFileStatus = VALID;
		InPipe = In->createPipeReader();
		if ( InPipe == NULL ) {
		    cerr << "ERROR: Couldn't attach pipe to input file\n";
		    inputFileStatus = INVALID;
		    delete In;
		}
	    } else {
		cerr << "ERROR: Problem with input file\n\n";
		inputFileStatus = INVALID;
		delete In;
	    }
	} while ( inputFileStatus == INVALID );

 	/****************************************************************
         *    Process the packets in the files.  Read from the input 
	 *    pipe and update the profile information.
         ****************************************************************/
	RecordDictionary     RecDict;

	int pktCount = 0;
	int attrCount = 0; 
	int descrCount = 0;
	int dataCount = 0;
	int cmdCount = 0;
	int duplicateCount = 0;

	StructureDescriptor *origSDp;
	StructureDescriptor *statSDp;
	CString		     statName;

	cerr << "Currently processing packet... \n";

	PacketHeader PH = InPipe->getPacketHeader();

	while( PH.type != PIPE_EMPTY ) {
	    if ( ( ++pktCount % 1000 ) == 0 ) {
	        cerr << pktCount << "... ";
		cerr.flush();
	    }

	    switch( PH.type ) {

	      case PKT_ATTRIBUTE:
		  attrCount++;
		  break;

	      case PKT_DESCRIPTOR:
		  descrCount++;
		  origSDp = new StructureDescriptor();
		  InPipe->getDescriptor( *origSDp );

		  if ( RecDict.insert( PH.tag, *origSDp ) == SUCCESS_ ){
		      statName = origSDp->getName() + ":";
		      statSDp = new StructureDescriptor( statName );
		      buildStatRecord( origSDp, statSDp );
		      if ( ! RecDict.insert( PH.tag+STAT_TAG, *statSDp ) ){
			  cerr << "ERROR: Duplicate Stat tag seen ";
			  cerr << PH.tag+STAT_TAG << "\n";
			  exit( -1 );
		      }
		      delete statSDp;
		  } else {
		      duplicateCount++;
		  }

		  delete origSDp;
		  break;

	      case PKT_DATA:
		  {
		  dataCount++;
	          RecordDossier& origDossier = RecDict.fetch( PH.tag );
		  InPipe->getData( origDossier );
		  RecordDossier& statDossier = RecDict.fetch( PH.tag+STAT_TAG );
		  updateStatRecord( origDossier, statDossier );
		  }
		  break;

	      case PKT_COMMAND:
		  cmdCount++;
		  break;

	    }
	    PH = InPipe->getPacketHeader();
	}

	printf( "\nThere are %d packets in the file %s.\n", 
		 	    attrCount+descrCount+dataCount+cmdCount, BUF );

	printf( "%d Descriptor; %d Data; %d Attribute; %d Command.\n",
		 	    descrCount, dataCount, attrCount, cmdCount );

	if ( duplicateCount != 0 ) {
	    printf( "%d of the Descriptor packets had duplicate tags.\n", 
			    duplicateCount );
	}

	printStatRecords( RecDict );
	
	delete InPipe;
	delete In;
}

void
buildStatRecord( StructureDescriptor *inSDp, StructureDescriptor *statSDp )
{
	static Attributes attr;
	static CString    fieldName;
	FieldDescriptor  *fieldP;

	/* 
	 * The first field of a statistics record is always a count of the 
	 * data records of this type in the file. 
	 */
	fieldP = new FieldDescriptor( "_Count Of Records_", attr, INTEGER, 0 );
	statSDp->insert( *fieldP );
	delete fieldP;

	/*
	 * Next, we iterate over the fields in the incoming descriptor and
	 * create corresponding fields in the statistics record to hold
	 * the Minimum and Maximum values.  For non-scalar fields, we also
	 * create a field in the statistics record to hold the Minimum and
	 * Maximum sizes for each of the dimensions.
	 */

	StructureDescriptorIterator iterator( *inSDp ); 
	FieldDescriptor inField = iterator.first();

	while( inField.notNull() ) {
	    fieldP = new FieldDescriptor( inField );	
	    fieldP->setDimension( 1 );
	    statSDp->insert( *fieldP );
	    delete fieldP;

	    if ( inField.getDimension() > 0 ) {
		fieldName = inField.getName() + " Dimension Sizes";
		fieldP = new FieldDescriptor( fieldName, attr, INTEGER, 1 );
	        statSDp->insert( *fieldP );
		delete fieldP;
	    }

	    inField = iterator.next();
	}
}

void
updateStatRecord( RecordDossier& inDossier, RecordDossier& statDossier )
{
	// These are static just so they do not have to be recreated each time
	static Value one( 1 );
	static int   fieldCount;
	static Value minValue;
	static Value maxValue;
	static Value newValue;

	FieldDescriptor *inFieldP; 
	Array           *inArrayP;
	int 		inDimension;
	int		inCellCount;
	const int	*inDimSizes;
	Array           *statArrayP;
	int		dimSize;

	int		a;		// index for cells in input array
	int		c;		// index for cells in stat array
	int		d;		// input for cells in dimension array
	int	        i;		// index for fields in input dossier
	int		s;		// index for fields in stat dossier

	/* 
	 * First, update our count of records seen for this type.
	 * Set flag if this is the first time we've seen a record of this type.
	 */
	Value *valueP = statDossier.getValueP( 0 );
	*valueP = *valueP + one;
	Boolean_ firstOne = ( *valueP == one );

	/* 
	 * Next, we cycle through each field in the input record and update
	 * our statistic Minimum and Maximum values if appropriate.  
	 */
	s = 1;

	fieldCount = inDossier.entryCount();
        for ( i = 0; i < fieldCount; i++ ) {

	    inFieldP = inDossier.getField( i );
	    statArrayP = statDossier.getArrayP( s++ );
	    inDimension = inFieldP->getDimension();

	    if ( inDimension > 0 ) {
		inArrayP = inDossier.getArrayP( i );
		inCellCount = inArrayP->getCellCount();
		inDimSizes = inArrayP->getDimSizes();
	    }

	    if ( firstOne ) {
		/*
		 * The first time we see a record of a given type, we set
		 * the dimension size for our statistics Min/Max field
		 * and initialize both the minimum and maximum values to
		 * the input field value.
		 */
		dimSize = 2;			// Always 2 for Min and Max 
		statArrayP->setDimSizes( &dimSize );

		if ( inDimension == 0 ) {	
		    /* 
		     * For scalars, set Minimum and Maximum values
		     */
		    statArrayP->setTheCellValue( 0, inDossier.getValue( i ) );
		    statArrayP->setTheCellValue( 1, inDossier.getValue( i ) ); 
		} else {			// An array value
		    /*
		     * For arrays, find Minimum and Maximum values over
		     * all elements of the array.
		     */
		    if ( inCellCount != 0 ) {
		        inArrayP->getTheCellValue( minValue, 0 );
		        maxValue = minValue;
		    } else {
        		switch ( inArrayP->getType() ) {
            		    case CHARACTER:
				minValue = (char) MAXCHAR;
				maxValue = (char) MINCHAR;
                		break;
            		    case INTEGER:
				minValue = (int) MAXINT;
				maxValue = (int) MININT;
                		break;
            	 	    case FLOAT:
				minValue = (float) MAXFLOAT;
				maxValue = (float) MINFLOAT;
                		break;
            		    case DOUBLE:
				minValue = (double) MAXDOUBLE;
				maxValue = (double) MINDOUBLE;
                		break;
            		    case UNDEFINED:
            		    default:
				minValue = Value::NOVALUE;
				maxValue = Value::NOVALUE;
				break;
			}
		    }

	            for ( a = 1; a < inCellCount; a++ ) {
		 	inArrayP->getTheCellValue( newValue, a );
		 	if ( newValue < minValue ) {
		     	    minValue = newValue;
			} 
			if ( newValue > maxValue ) {
			    maxValue = newValue;
			}
		    }
		    statArrayP->setTheCellValue( 0, minValue );
		    statArrayP->setTheCellValue( 1, maxValue ); 

		    /*
		     * Now, we also need to set the dimensions for our
		     * DimSizes statistics field and initialize the Minimum
		     * and Maximum values there.  (We never let dimSize = 2
		     * so we can distinguish these "special fields"
		     * for array dimensions from the normal statistics
		     * min/max fields.)
		     */
		    statArrayP = statDossier.getArrayP( s++ );
		    dimSize = (inDimension * 2);
		    if ( dimSize == 2 ) {
			dimSize = 3;
		    }
		    statArrayP->setDimSizes( &dimSize );

		    c = 0;
		    for ( d = 0; d < inDimension; d++ ) {
			statArrayP->setTheCellValue( c++, inDimSizes[d] ) ;
			statArrayP->setTheCellValue( c++, inDimSizes[d] ) ;
		    }
		}

	    } else {			

		if ( inDimension == 0 ) {
		    /*
		     * For scalars, adjust Minimum and Maximum values as
		     * needed.
		     */
		    newValue = inDossier.getValue( i );
		    if ( newValue < statArrayP->getTheCellValue( 0 ) ) {
			statArrayP->setTheCellValue( 0, newValue );
		    }
		    if ( newValue > statArrayP->getTheCellValue( 1 ) ) {
			statArrayP->setTheCellValue( 1, newValue );
		    }

		} else {
		    /*
		     * For arrays, compare each cell value to the
		     * statistic record Minimum and Maximum. 
		     */
		    statArrayP->getTheCellValue( minValue, 0 );
		    statArrayP->getTheCellValue( maxValue, 1 );

	            for ( a = 0; a < inCellCount; a++ ) {
		 	inArrayP->getTheCellValue( newValue, a );
		 	if ( newValue < minValue ) {
		     	    minValue = newValue;
			} 
			if ( newValue > maxValue ) {
			    maxValue = newValue;
			}
		    }
		    statArrayP->setTheCellValue( 0, minValue );
		    statArrayP->setTheCellValue( 1, maxValue ); 

		    /* 
		     * Then, update the DimSizes Minimum and Maximum Values.
		     */
		    statArrayP = statDossier.getArrayP( s++ );

		    c = 0;
		    for ( d = 0; d < inDimension; d++ ) {
			if ( inDimSizes[d] < 
				      (int)statArrayP->getTheCellValue( c ) ) {
			    statArrayP->setTheCellValue( c, inDimSizes[d] ) ;
			}
			c++;
			if ( inDimSizes[d] > 
				      (int)statArrayP->getTheCellValue( c ) ) {
			    statArrayP->setTheCellValue( c, inDimSizes[d] ) ;
			}
			c++;
		    }
		}
	    }
	}
}

void
printStatRecords( RecordDictionary& RDict )
{
	RecordDossier *dossier;
	Array	      *arrayP;
	Value	       min;
	Value	       max;
	int	       count;
	int	       fieldCount;
	const int     *dimSizes;
	int	       dimensions;
	int	       i, j, k;

	printf( "\nInformation by Record Type");
	printf( "\n--------------------------");

	RecordDictionaryPIterator iterator( RDict );
	dossier = iterator.first();

	while ( dossier != NULL ) {

	    if ( dossier->getFieldID( "_Count Of Records_" ) == 0 ) {
		/* 
		 * This is a statistics Record!
		 */
		count = dossier->getValue( 0 );
		printf( "\n%s        (%d Data Records)\n",
			 (const char *)dossier->getName(), count );

		fieldCount = dossier->entryCount();
		for ( i = 1; i < fieldCount; i++ ) {
		    /*
		     * We know that all the fields except the first (which
		     * we already processed) are vectors.  
		     */
		    arrayP = dossier->getArrayP( i );
		    dimSizes = arrayP->getDimSizes();

		    if ( *dimSizes == 2 ) {
			/*
			 * This is a Minimum-Maximum field descriptor
			 */
		        printf( "    Field: %s", 
				 (const char*)dossier->getField(i)->getName() );
		        if ( count > 0 ) {
		            min = arrayP->getTheCellValue( 0 );
		            max = arrayP->getTheCellValue( 1 );
			    if ( min > max ) {
				printf( "\n      No Values seen." );
			    } else {
		                printf( "\n      Minimum: " );
		                printValue( min );
		                printf( "\n      Maximum: " );
		                printValue( max );
			    }
		        }
		        printf( "\n" );

		    } else {
			/* 
			 * This is an Array dimension sizes field descriptor
			 */
		        if ( count > 0 ) {
			    dimensions = (*dimSizes)/2;
			    k = 0;
			    for ( j = 0; j < dimensions; j++ ) {
			        printf("      Dimension %d:", j );
			        printf("\tMinimum Size %d  \tMaximum Size %d\n",
				          (int)arrayP->getTheCellValue( k++ ),
				          (int)arrayP->getTheCellValue( k++ ) );

			    }		// for ( j = ... )
		        }		// if ( count > 0 )
		    }			// if ( *dimSizes == 2 )
		} 			// for ( i = ... )
	    } 				// if ( dossier->getFieldID

	    dossier = iterator.next();
	}

}

void
printValue( const Value& value ) 
{
        switch( value.getTraits().getType() ) {
          case CHARACTER:
		printf( "%c", (char)value );
                break;
          case INTEGER:
                printf( "%d", (int)value );
                break;
          case FLOAT:
                printf( "%.7g", (float)value );
                break;
          case DOUBLE:
                printf( "%.16g", (double)value );
                break;
          case UNDEFINED:
          default:
                break;
        }
}


