/*
 * This file is part of the Pablo Performance Analysis Environment
 *
 *          (R)
 * The Pablo    Performance Analysis Environment software is NOT in
 * the public domain.  However, it is freely available without fee for
 * education, research, and non-profit purposes.  By obtaining copies
 * of this and other files that comprise the Pablo Performance Analysis
 * Environment, you, the Licensee, agree to abide by the following
 * conditions and understandings with respect to the copyrighted software:
 * 
 * 1.  The software is copyrighted in the name of the Board of Trustees
 *     of the University of Illinois (UI), and ownership of the software
 *     remains with the UI. 
 *
 * 2.  Permission to use, copy, and modify this software and its documentation
 *     for education, research, and non-profit purposes is hereby granted
 *     to Licensee, provided that the copyright notice, the original author's
 *     names and unit identification, and this permission notice appear on
 *     all such copies, and that no charge be made for such copies.  Any
 *     entity desiring permission to incorporate this software into commercial
 *     products should contact:
 *
 *          Professor Daniel A. Reed                 reed@cs.uiuc.edu
 *          University of Illinois
 *          Department of Computer Science
 *          2413 Digital Computer Laboratory
 *          1304 West Springfield Avenue
 *          Urbana, Illinois  61801
 *          USA
 *
 * 3.  Licensee may not use the name, logo, or any other symbol of the UI
 *     nor the names of any of its employees nor any adaptation thereof in
 *     advertizing or publicity pertaining to the software without specific
 *     prior written approval of the UI.
 *
 * 4.  THE UI MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THE
 *     SOFTWARE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS
 *     OR IMPLIED WARRANTY.
 *
 * 5.  The UI shall not be liable for any damages suffered by Licensee from
 *     the use of this software.
 *
 * 6.  The software was developed under agreements between the UI and the
 *     Federal Government which entitle the Government to certain rights.
 *
 **************************************************************************
 *
 * Developed by: The TAPESTRY Parallel Computing Laboratory
 *		 University of Illinois at Urbana-Champaign
 *		 Department of Computer Science
 *		 1304 W. Springfield Avenue
 *		 Urbana, IL	61801
 *
 * Copyright (c) 1991-1994
 * The University of Illinois Board of Trustees.
 *	All Rights Reserved.
 *
 * PABLO is a registered trademark of
 * The Board of Trustees of the University of Illinois
 * registered in the U.S. Patent and Trademark Office.
 *
 * Author:  Daniel A. Reed (reed@cs.uiuc.edu)
 * Contributing Author:  Xingbin Zhang (zhang@cs.uiuc.edu)
 * Contributing Author:  Roger J. Noe (noe@cs.uiuc.edu)
 * Project Manager and Principal Investigator:
 *	Daniel A. Reed (reed@cs.uiuc.edu)
 *
 * Funded by: National Science Foundation grants NSF CCR87-06653 and
 * NSF CDA87-22836 (Tapestry), DARPA Contract No. DABT63-91-K-0004,
 * by a grant from the Digital Equipment Corporation External Research
 * Program, and by a collaborative research agreement with the Intel
 * Supercomputer Systems Division.
 *
 */

/*
 * pdeNode.c:
 *	This file contains the node code for an iterative elliptic
 *	PDE solver, implemented for the Intel iPSC/2 and iPSC/860.
 */

#include <math.h>
#ifdef __PARAGON__
# include <nx.h>
#endif

#include "ProcTrace.h"
#include "iPSCtrace.h"

#if defined(PROCTRACE) || defined(MESGTRACE)
# define	TRACE
#endif

#define	TRUE		1
#define	FALSE		0
#define	EPSILON		1e-5

#define MAXITERmsg	0	/* Message type: max number of iterations    */
#define CONVITERmsg	1       /* Message type: convergence check frequency */
#define ROWSmsg		2       /* Message type: grid dimension		     */
#define WIDTHmsg	3       /* Message type: partition width	     */
#define STRIPmsg	4       /* Message type: partition data		     */
#define UPPERTOLOWERmsg 5	/* Message type: send boundary points "up"   */
#define LOWERTOUPPERmsg 6	/* Message type: send boundary points "down" */
#define CONVBACKmsg	7	/* Message type: convergence value broadcast */
#define LOCALCONVmsg	8	/* Message type: local convergence flags     */
				/* HAS NODE NUMBER ADDED TO IT		     */

#define ALL		-1	/* All nodes in the hypercube		     */

#define NODEPID		0	/* Node pid				     */
#define HOSTPID		1	/* Host pid				     */


int	Rows;			/* number of rows in grid		     */
int	ConvIter;		/* iterations between checks		     */
int	MaxIter;		/* iteration count for benchmark	     */
int	stripWidth;

long    myNode;
long    nNodes;
double  NodeCnt;
int	cubeDim;

float	**OldPartition;		/* partitions for jacobi iteration	     */
float	**NewPartition;		/* iterations				     */
float	**Partition;

extern long numnodes();
extern long mynode();
extern long myhost();
extern long gray();
extern long ginv();


main () {
	char	traceFile[32];		/* instrumentation trace file name   */

	int	Iter;			/* iteration number		     */
	int	midToUpper;		/* boundary condition transmission   */
	int	midToLower;		/* flags			     */
	int	GlobalConv;		/* global convergence flag	     */

	int	i;

	void InitGrid();

	/* Define the name of the trace output file for this node and	     */
	/* pass it to the trace capture library interface.		     */

#ifdef TRACE
	sprintf( traceFile, "Jacobi%d.bin", mynode() );
	setTraceFileName( traceFile );
#endif /* TRACE */

	/* Perform pre-initialization for both the message-passing and	     */
	/* procedure tracing extensions to the trace library (if	     */
	/* needed).  This will guarantee that record descriptors for	     */
	/* both kinds of tracing extensions will be output before the	     */
	/* trace library's internal event families.			     */

#ifdef MESGTRACE
	preInitMesgTrace();
#endif /* MESGTRACE */

#ifdef PROCTRACE
	preInitProcTrace();
#endif /* PROCTRACE */

	/* All pre-initialization is done.  Now do basic initialization	     */
	/* of the procedure and message-passing tracing extensions (if	     */
	/* needed), and as a consequence, basic initialization of the	     */
	/* trace library itself.					     */

#ifdef PROCTRACE
	initProcTrace( NUMBER_OF_PROCS, procEntries, procExits );
#endif /* PROCTRACE */

#ifdef MESGTRACE
	iPSCtraceInit();
#endif /* MESGTRACE */

	Iter = 0;
	GlobalConv = FALSE;

	myNode = mynode();
	nNodes = numnodes();
	NodeCnt = nNodes;
	cubeDim = (int)(log(NodeCnt) / log(2.0) + 0.5);

	/*
	 *
	 * Receive size information from host
	 *
	 */

	CRECV( ROWSmsg, &Rows, sizeof(int) );
	CRECV( MAXITERmsg, &MaxIter, sizeof(int) );
	CRECV( CONVITERmsg, &ConvIter, sizeof(int) );
	CRECV( WIDTHmsg, &stripWidth, sizeof(int) );

	/*
	 *
	 * Initialize the grid based on the partition width
	 *
	 */

#ifdef PROCTRACE
	traceEvent( INITGRIDEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	InitGrid();			/* allocate the partition array      */

#ifdef PROCTRACE
	traceEvent( INITGRIDExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	/*
	 *
	 * Node 0 gets the top boundary of the entire grid and the last node
	 * in the gray code embedding gets the bottom boundary.
	 *
	 */

	if( myNode == 0 ) {

	   CRECV( STRIPmsg, OldPartition[0],
	          (stripWidth + 1) * (Rows + 2) * sizeof(float) );

	   for (i = 0; i <= Rows + 1; i++)	    /* fixed boundary values */
	      NewPartition[0][i] = OldPartition[0][i];

	} else if (myNode == gray(nNodes - 1)) {

	   CRECV( STRIPmsg, OldPartition[1],
		  (stripWidth + 1) * (Rows + 2) * sizeof(float) );

	   for (i = 0; i <= Rows + 1; i++)	    /* fixed boundary values */
	      NewPartition[stripWidth+1][i] = OldPartition[stripWidth+1][i];

	} else
	    CRECV( STRIPmsg, OldPartition[1],
		   stripWidth * (Rows + 2) * sizeof(float) );

	/* Copy left and right boundary pts to carry them between iterations */
	for(i = 1; i <= stripWidth; i++) {
	    NewPartition[i][0] = OldPartition[i][0];
	    NewPartition[i][Rows+1] = OldPartition[i][Rows+1];
	}

	/*
	 *
	 * The formula gray((ginv(myNode) - 1 + nNodes) % nNodes is
	 * not used to calculate the upper node since the embedding is not
	 * a ring but a strip; hence, there's no wrap around. Similarly,
	 * a simplified formula is also used for lower node identifiers.
	 *
	 */

	for (Iter = 1; (Iter <= MaxIter) && !GlobalConv; Iter++) {
	   if( myNode != 0 )
		midToUpper = ISEND( LOWERTOUPPERmsg,
				    OldPartition[1],
				    (Rows + 2) * sizeof(float),
				    gray( ginv(myNode)-1), NODEPID );

	   if( myNode != gray(nNodes - 1) )
		midToLower = ISEND( UPPERTOLOWERmsg,
				    OldPartition[stripWidth],
				    (Rows + 2) * sizeof(float),
				    gray( ginv(myNode)+1), NODEPID );

#ifdef PROCTRACE
	   traceEvent( ITERATEEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   Iterate();			/* update grid interior points	     */

#ifdef PROCTRACE
	   traceEvent( ITERATEExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  /*
	   *
	   * Retrieve the boundary points
	   *
	   */

	  if( myNode != gray(nNodes - 1) )
	     CRECV( LOWERTOUPPERmsg, OldPartition[stripWidth + 1],
		    (Rows + 2) * sizeof(float) );

	  if( myNode != 0 )
	     CRECV( UPPERTOLOWERmsg, OldPartition[0],
		    (Rows + 2) * sizeof(float) );

#ifdef PROCTRACE
	   traceEvent( ITERATEEDGEEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   IterateEdge();		/* Update the two partition edges    */

#ifdef PROCTRACE
	   traceEvent( ITERATEEDGEExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  /*
	   *
	   * Check for global convergence using logarithmic condensation
	   *
	   */

#ifdef PROCTRACE
	   traceEvent( GLOBALCONVERGEEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   GlobalConv = Convergence(Iter);

#ifdef PROCTRACE
	   traceEvent( GLOBALCONVERGEExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   Partition = NewPartition;    /* swap the partitions		     */
	   NewPartition = OldPartition;
	   OldPartition = Partition;

	   if (myNode != 0)
	     msgwait( midToUpper );

	   if (myNode != gray(nNodes - 1))
	     msgwait( midToLower );

	 }

	/*
	 *
	 * Iterations complete.  Send the partition back to the host
	 *
	 */

	if ( myNode == 0 )
		CSEND( nNodes + 1, &Iter, sizeof(int), myhost(), HOSTPID );

	CSEND( myNode, OldPartition[1],
	       stripWidth * (Rows + 2) * sizeof(float),
	       myhost(), HOSTPID );

#ifdef TRACE
	endTracing();
#endif /* TRACE */
}


/************************************************************************/
/*									*/
/*	Procedure:	InitGrid					*/
/*	Function:	Initializes parameters for PDE solver		*/
/*	Author:		Daniel A. Reed					*/
/*	Last Change:	11/9/87						*/
/*									*/
/*	Parameters:	None (globals modified)				*/
/*									*/
/************************************************************************/

void InitGrid() {

	int	i, j;

	OldPartition = (float **) malloc ((stripWidth + 2) * sizeof (float *));
	NewPartition = (float **) malloc ((stripWidth + 2) * sizeof (float *));

	OldPartition [0] = (float *)
		malloc ((stripWidth + 2) * (Rows + 2) * sizeof (float));
	NewPartition [0] = (float *)
		malloc ((stripWidth + 2) * (Rows + 2) * sizeof (float));

	for (i = 1; i <= stripWidth + 1; i++) {
	   OldPartition [i] = OldPartition [0] + i * (Rows + 2);
	   NewPartition [i] = NewPartition [0] + i * (Rows + 2);
	}
}


/************************************************************************/
/*									*/
/*    Procedure:	Converged					*/
/*    Function:		Determine convergence				*/
/*    Author:		Daniel Reed					*/
/*    Last change:	11/19/87					*/
/*									*/
/*    Output:	TRUE  - the current approximation has converged		*/
/*		FALSE - the current approximation has not converged	*/
/*									*/
/************************************************************************/

Converged (Iter)

	int	Iter;
{
	int	i, j;

	for (i = 1; i <= stripWidth; i++)
	   for (j = 1; j <= Rows; j++)
	      if (fabs (OldPartition [i][j] - NewPartition [i][j]) > EPSILON)
		 return (FALSE);

	return (TRUE);
}


/************************************************************************/
/*                                                                      */
/*      Procedure:      Convergence                                     */
/*      Function:       global convergence checking                     */
/*      Authors:        Daniel A. Reed                                  */
/*			Xingbin Zhang					*/
/*      Last Change:    5/6/92                                          */
/*									*/
/************************************************************************/

int Convergence(Iter)

	int	Iter;
{
	int	GlobalConv;
	int	tempConv;

	int	i;
	int	cDim;

	/*
	 *
	 * Check for global convergence using logarithmic condensation
	 *
	 */

	if ((Iter % ConvIter) == 0) {

#ifdef PROCTRACE
	  traceEvent( CONVERGEDEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  GlobalConv = Converged (Iter);		/* local convergence */

#ifdef PROCTRACE
	  traceEvent( CONVERGEDExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  if ( myNode == 0 ) {

		/*
	         *
		 * Node 0 receives in total cube dimension number of messages
		 * containing intermediate convergence results, computes the
		 * AND of them, and redistribute them to all other nodes
		 * logarithmically.
		 *
		 */


		for (i = 0; i < cubeDim; i++) {
		   CRECV( LOCALCONVmsg + (1 << i), &tempConv, sizeof(int) );

		   GlobalConv = GlobalConv && tempConv;
		}

		for (i = cubeDim - 1; i >= 0; i--)
		   CSEND( CONVBACKmsg, &GlobalConv, sizeof(int),
			  1 << i, NODEPID );
	      }
	      else {

		/*
		 *
		 * Receive all the intermediate results for this node
		 *
		 */

		cDim = 0;

		while( (myNode >> cDim) % 2 == 0) {
		   CRECV( LOCALCONVmsg + (((myNode >> cDim)+1) << cDim),
			  &tempConv, sizeof(int) );

		   GlobalConv = GlobalConv && tempConv;
		   cDim++;
		}

		/*
		 *
		 * Send the total result "up"
		 *
		 */

		CSEND( LOCALCONVmsg + myNode, &GlobalConv, sizeof(int),
		       ((myNode >> cDim) - 1) << cDim, NODEPID );

		/*
		 *
		 * Receive the new global convergence result
		 *
		 */

		CRECV( CONVBACKmsg, &GlobalConv, sizeof(int) );

		/*
		 *
		 * Redistribute the global result to other nodes
		 *
		 */

		for(i = cDim - 1; i >= 0; i--) {
		   CSEND( CONVBACKmsg, &GlobalConv, sizeof(int),
			  ((myNode >> i) + 1) << i, NODEPID );
		}
	     }
	  } else
	     GlobalConv = FALSE;

	return( GlobalConv );
}


/************************************************************************/
/*									*/
/*	Procedure:	Iterate						*/
/*	Function:	Jacobi iteration				*/
/*	Author:		Daniel A. Reed					*/
/*	Last Change:	1/7/88						*/
/*	Modified so that the procedure updates only the internal points	*/
/*		of the grid and does one iteration;			*/
/************************************************************************/

Iterate()
{
	int    i,j;

	for (i = 2; i <= stripWidth - 1; i++) {
	   for (j = 1; j <= Rows; j++) {
	      NewPartition [i][j] =
		0.25 * (OldPartition [i-1][j] + OldPartition [i][j+1] +
			OldPartition [i+1][j] + OldPartition [i][j-1]);
	   }
	}
}




/************************************************************************/
/*									*/
/*	Procedure:	IterateEdge					*/
/*	Function:	Jacobi iteration				*/
/*	Updates the boundary points of the strip once;			*/
/*									*/
/************************************************************************/

IterateEdge()
{
	int    i,j;

	/* upper boundary */
	i = 1;
	for (j = 1; j <= Rows; j++) {
	      NewPartition [i][j] =
		0.25 * (OldPartition [i-1][j] + OldPartition [i][j+1] +
			OldPartition [i+1][j] + OldPartition [i][j-1]);
	}

	/* lowerboundary (if stripWidth = 1 then row 1 computed twice */
	i = stripWidth;
	for (j = 1; j <= Rows; j++) {
	      NewPartition [i][j] =
		0.25 * (OldPartition [i-1][j] + OldPartition [i][j+1] +
			OldPartition [i+1][j] + OldPartition [i][j-1]);
	}
}
