/*
 * This file is part of the Pablo Performance Analysis Environment
 *
 *          (R)
 * The Pablo    Performance Analysis Environment software is NOT in
 * the public domain.  However, it is freely available without fee for
 * education, research, and non-profit purposes.  By obtaining copies
 * of this and other files that comprise the Pablo Performance Analysis
 * Environment, you, the Licensee, agree to abide by the following
 * conditions and understandings with respect to the copyrighted software:
 * 
 * 1.  The software is copyrighted in the name of the Board of Trustees
 *     of the University of Illinois (UI), and ownership of the software
 *     remains with the UI. 
 *
 * 2.  Permission to use, copy, and modify this software and its documentation
 *     for education, research, and non-profit purposes is hereby granted
 *     to Licensee, provided that the copyright notice, the original author's
 *     names and unit identification, and this permission notice appear on
 *     all such copies, and that no charge be made for such copies.  Any
 *     entity desiring permission to incorporate this software into commercial
 *     products should contact:
 *
 *          Professor Daniel A. Reed                 reed@cs.uiuc.edu
 *          University of Illinois
 *          Department of Computer Science
 *          2413 Digital Computer Laboratory
 *          1304 West Springfield Avenue
 *          Urbana, Illinois  61801
 *          USA
 *
 * 3.  Licensee may not use the name, logo, or any other symbol of the UI
 *     nor the names of any of its employees nor any adaptation thereof in
 *     advertizing or publicity pertaining to the software without specific
 *     prior written approval of the UI.
 *
 * 4.  THE UI MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THE
 *     SOFTWARE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS
 *     OR IMPLIED WARRANTY.
 *
 * 5.  The UI shall not be liable for any damages suffered by Licensee from
 *     the use of this software.
 *
 * 6.  The software was developed under agreements between the UI and the
 *     Federal Government which entitle the Government to certain rights.
 *
 **************************************************************************
 *
 * Developed by: The TAPESTRY Parallel Computing Laboratory
 *		 University of Illinois at Urbana-Champaign
 *		 Department of Computer Science
 *		 1304 W. Springfield Avenue
 *		 Urbana, IL	61801
 *
 * Copyright (c) 1991-1994
 * The University of Illinois Board of Trustees.
 *	All Rights Reserved.
 *
 * PABLO is a registered trademark of
 * The Board of Trustees of the University of Illinois
 * registered in the U.S. Patent and Trademark Office.
 *
 * Author:  Daniel A. Reed (reed@cs.uiuc.edu)
 * Contributing Author:  Xingbin Zhang (zhang@cs.uiuc.edu)
 * Contributing Author:  Roger J. Noe (noe@cs.uiuc.edu)
 * Project Manager and Principal Investigator:
 *	Daniel A. Reed (reed@cs.uiuc.edu)
 *
 * Funded by: National Science Foundation grants NSF CCR87-06653 and
 * NSF CDA87-22836 (Tapestry), DARPA Contract No. DABT63-91-K-0004,
 * by a grant from the Digital Equipment Corporation External Research
 * Program, and by a collaborative research agreement with the Intel
 * Supercomputer Systems Division.
 *
 */

/*
 * pdeNode.c:
 *	This file contains the node code for an iterative elliptic
 *	PDE solver, implemented for the Thinking Machines CM-5 (CMMD).
 */

#include <cm/cmmd.h>
#include <math.h>

#include "ProcTrace.h"
#include "CMMDtrace.h"

#if defined(PROCTRACE) || defined(MESGTRACE)
# define	TRACE
#endif

#define	TRUE		1
#define	FALSE		0
#define	EPSILON		1e-5

#define UPPERTOLOWERtag 5	/* Message type: send boundary points "up"   */
#define LOWERTOUPPERtag 6	/* Message type: send boundary points "down" */

int	Rows;			/* number of rows in grid		     */
int	ConvIter;		/* iterations between checks		     */
int	MaxIter;		/* iteration count for benchmark	     */
int	stripWidth;

int	myNode;
int	nNodes;

float	**OldPartition;		/* partitions for jacobi iteration	     */
float	**NewPartition;		/* iterations				     */
float	**Partition;

main () {
#ifdef TRACE
	char	traceFile[64];		/* instrumentation trace file name   */
# ifdef DEBUG
	char	debugFile[64];		/* instrumentation debug file name   */
# endif /* DEBUG */
#endif /* TRACE */

	int		Iter;		/* iteration number		     */
	CMMD_mcb	midToUpper;	/* boundary condition transmission   */
	CMMD_mcb	midToLower;	/* message control blocks	     */
	int		GlobalConv;	/* global convergence flag	     */

	int	i;

	void InitGrid();

	CMMD_enable_host();

	Iter = 0;
	GlobalConv = FALSE;

	myNode = CMMD_self_address();
	nNodes = CMMD_partition_size();

	/*
	 *
	 * Receive size information from host
	 *
	 */

	CMMD_receive_bc_from_host( &Rows, sizeof(int) );
	CMMD_receive_bc_from_host( &MaxIter, sizeof(int) );
	CMMD_receive_bc_from_host( &ConvIter, sizeof(int) );
	CMMD_receive_block( CMMD_host_node(), CMMD_ANY_TAG,
			    &stripWidth, sizeof(int) );

	/*
	 *
	 * Initialize the grid based on the partition width
	 *
	 */

	InitGrid();			/* allocate the partition array      */

	/*
	 *
	 * Node 0 gets the top boundary of the entire grid and the last node
	 * gets the bottom boundary.
	 *
	 */

	if( myNode == 0 ) {

	   CMMD_receive_block( CMMD_host_node(), CMMD_ANY_TAG, OldPartition[0],
			       (stripWidth + 1) * (Rows + 2) * sizeof(float) );

	   for (i = 0; i <= Rows + 1; i++)	    /* fixed boundary values */
	      NewPartition[0][i] = OldPartition[0][i];

	} else if (myNode == nNodes - 1) {

	   CMMD_receive_block( CMMD_host_node(), CMMD_ANY_TAG, OldPartition[1],
			       (stripWidth + 1) * (Rows + 2) * sizeof(float) );

	   for (i = 0; i <= Rows + 1; i++)	    /* fixed boundary values */
	      NewPartition[stripWidth+1][i] = OldPartition[stripWidth+1][i];

	} else {

	   CMMD_receive_block( CMMD_host_node(), CMMD_ANY_TAG, OldPartition[1],
			       stripWidth * (Rows + 2) * sizeof(float) );
	}

	/* Copy left and right boundary pts to carry them between iterations */
	for(i = 1; i <= stripWidth; i++) {
	    NewPartition[i][0] = OldPartition[i][0];
	    NewPartition[i][Rows+1] = OldPartition[i][Rows+1];
	}

	/* Define the name of the trace output file for this node and	     */
	/* pass it to the trace capture library interface.		     */

#ifdef TRACE
	sprintf( traceFile, "/scr6/noe/Jacobi%.3d.bin", myNode );
	setTraceFileName( traceFile );
# ifdef DEBUG
	sprintf( debugFile, "/scr6/noe/debug%.3d", myNode );
	setDebugFileName( debugFile );
# endif /* DEBUG */
#endif /* TRACE */

	/* Perform pre-initialization for both the message-passing and	     */
	/* procedure tracing extensions to the trace library (if	     */
	/* needed).  This will guarantee that record descriptors for	     */
	/* both kinds of tracing extensions will be output before the	     */
	/* trace library's internal event families.			     */

#ifdef MESGTRACE
	preInitMesgTrace();
#endif /* MESGTRACE */

#ifdef PROCTRACE
	preInitProcTrace();
#endif /* PROCTRACE */

	/* All pre-initialization is done.  Now do basic initialization	     */
	/* of the procedure and message-passing tracing extensions (if	     */
	/* needed), and as a consequence, basic initialization of the	     */
	/* trace library itself.					     */

#ifdef PROCTRACE
	initProcTrace( NUMBER_OF_PROCS, procEntries, procExits );
#endif /* PROCTRACE */

#ifdef MESGTRACE
	CMMDtraceInit();
#endif /* MESGTRACE */

	for (Iter = 1; (Iter <= MaxIter) && !GlobalConv; Iter++) {
	   if( myNode != 0 )
		midToUpper = SEND_ASYNC( myNode-1,
					 LOWERTOUPPERtag,
					 OldPartition[1],
					 (Rows + 2) * sizeof(float),
					 (void (*)()) NULL,
					 (void *) NULL
				       );

	   if( myNode != nNodes - 1 )
		midToLower = SEND_ASYNC( myNode+1,
					 UPPERTOLOWERtag,
					 OldPartition[stripWidth],
					 (Rows + 2) * sizeof(float),
					 (void (*)()) NULL,
					 (void *) NULL
				       );

#ifdef PROCTRACE
	   traceEvent( ITERATEEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   Iterate();			/* update grid interior points	     */

#ifdef PROCTRACE
	   traceEvent( ITERATEExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  /*
	   *
	   * Retrieve the boundary points
	   *
	   */

	  if( myNode != nNodes - 1 )
	     RECEIVE_BLOCK( myNode+1, LOWERTOUPPERtag,
			    OldPartition[stripWidth + 1],
			    (Rows + 2) * sizeof(float) );

	  if( myNode != 0 )
	     RECEIVE_BLOCK( myNode-1, UPPERTOLOWERtag,
			    OldPartition[0],
			    (Rows + 2) * sizeof(float) );

#ifdef PROCTRACE
	   traceEvent( ITERATEEDGEEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   IterateEdge();		/* Update the two partition edges    */

#ifdef PROCTRACE
	   traceEvent( ITERATEEDGEExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  /*
	   *
	   * Check for global convergence
	   *
	   */

#ifdef PROCTRACE
	   traceEvent( GLOBALCONVERGEEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   GlobalConv = Convergence(Iter);

#ifdef PROCTRACE
	   traceEvent( GLOBALCONVERGEExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	   Partition = NewPartition;    /* swap the partitions		     */
	   NewPartition = OldPartition;
	   OldPartition = Partition;

	   if (myNode != 0) {
	     CMMD_msg_wait( midToUpper );
	     CMMD_free_mcb( midToUpper );
	   }

	   if (myNode != nNodes - 1) {
	     CMMD_msg_wait( midToLower );
	     CMMD_free_mcb( midToLower );
	   }

	}

	/*
	 *
	 * Iterations complete
	 *
	 */

	/* Flush the trace buffer to the output file and de-initialize	     */
	/* the instrumentation library software.			     */

#ifdef TRACE
	endTracing();
#endif /* TRACE */

	/*
	 *
	 * All I/O on this node has been completed.  When all the nodes
	 * reach this point, the I/O server on the host will be suspended.
	 *
	 */

	CMMD_global_suspend_servers();

	/*
	 *
	 * Send the partition back to the host
	 *
	 */

	if ( myNode == 0 )
	    CMMD_send_block( CMMD_host_node(), CMMD_DEFAULT_TAG, &Iter,
			     sizeof(int) );

	CMMD_send_block( CMMD_host_node(), CMMD_DEFAULT_TAG, OldPartition[1],
			 stripWidth * (Rows + 2) * sizeof(float) );

	exit(0);
}


/************************************************************************/
/*									*/
/*	Procedure:	InitGrid					*/
/*	Function:	Initializes parameters for PDE solver		*/
/*	Author:		Daniel A. Reed					*/
/*	Last Change:	11/9/87						*/
/*									*/
/*	Parameters:	None (globals modified)				*/
/*									*/
/************************************************************************/

void InitGrid() {

	int	i, j;

	OldPartition = (float **) malloc ((stripWidth + 2) * sizeof (float *));
	NewPartition = (float **) malloc ((stripWidth + 2) * sizeof (float *));

	OldPartition [0] = (float *)
		malloc ((stripWidth + 2) * (Rows + 2) * sizeof (float));
	NewPartition [0] = (float *)
		malloc ((stripWidth + 2) * (Rows + 2) * sizeof (float));

	for (i = 1; i <= stripWidth + 1; i++) {
	   OldPartition [i] = OldPartition [0] + i * (Rows + 2);
	   NewPartition [i] = NewPartition [0] + i * (Rows + 2);
	}
}


/************************************************************************/
/*									*/
/*    Procedure:	Converged					*/
/*    Function:		Determine convergence				*/
/*    Author:		Daniel Reed					*/
/*    Last change:	11/19/87					*/
/*									*/
/*    Output:	TRUE  - the current approximation has converged		*/
/*		FALSE - the current approximation has not converged	*/
/*									*/
/************************************************************************/

Converged (Iter)

	int	Iter;
{
	int	i, j;

	for (i = 1; i <= stripWidth; i++)
	   for (j = 1; j <= Rows; j++)
	      if (fabs (OldPartition [i][j] - NewPartition [i][j]) > EPSILON)
		 return (FALSE);

	return (TRUE);
}


/************************************************************************/
/*                                                                      */
/*      Procedure:      Convergence                                     */
/*      Function:       global convergence checking                     */
/*      Authors:        Daniel A. Reed                                  */
/*			Xingbin Zhang					*/
/*			Roger Noe					*/
/*      Last Change:    7/24/93                                         */
/*									*/
/************************************************************************/

int Convergence(Iter)

	int	Iter;
{
	int	GlobalConv;

	/*
	 *
	 * Check for global convergence
	 *
	 */

	if ((Iter % ConvIter) == 0) {

#ifdef PROCTRACE
	  traceEvent( CONVERGEDEntry, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  GlobalConv = Converged (Iter);		/* local convergence */

#ifdef PROCTRACE
	  traceEvent( CONVERGEDExit, (char *) NULL, 0 );
#endif /* PROCTRACE */

	  GlobalConv = REDUCE_INT( GlobalConv, CMMD_combiner_and );

	} else
	     GlobalConv = FALSE;

	return( GlobalConv );
}


/************************************************************************/
/*									*/
/*	Procedure:	Iterate						*/
/*	Function:	Jacobi iteration				*/
/*	Author:		Daniel A. Reed					*/
/*	Last Change:	1/7/88						*/
/*	Modified so that the procedure updates only the internal points	*/
/*		of the grid and does one iteration;			*/
/************************************************************************/

Iterate()
{
	int    i,j;

	for (i = 2; i <= stripWidth - 1; i++) {
	   for (j = 1; j <= Rows; j++) {
	      NewPartition [i][j] =
		0.25 * (OldPartition [i-1][j] + OldPartition [i][j+1] +
			OldPartition [i+1][j] + OldPartition [i][j-1]);
	   }
	}
}




/************************************************************************/
/*									*/
/*	Procedure:	IterateEdge					*/
/*	Function:	Jacobi iteration				*/
/*	Updates the boundary points of the strip once;			*/
/*									*/
/************************************************************************/

IterateEdge()
{
	int    i,j;

	/* upper boundary */
	i = 1;
	for (j = 1; j <= Rows; j++) {
	      NewPartition [i][j] =
		0.25 * (OldPartition [i-1][j] + OldPartition [i][j+1] +
			OldPartition [i+1][j] + OldPartition [i][j-1]);
	}

	/* lowerboundary (if stripWidth = 1 then row 1 computed twice */
	i = stripWidth;
	for (j = 1; j <= Rows; j++) {
	      NewPartition [i][j] =
		0.25 * (OldPartition [i-1][j] + OldPartition [i][j+1] +
			OldPartition [i+1][j] + OldPartition [i][j-1]);
	}
}
