/*
 *  $Id: chsend.c,v 1.17 1994/05/28 19:43:59 gropp Exp $
 *
 *  (C) 1993 by Argonne National Laboratory and Mississipi State University.
 *      All rights reserved.  See COPYRIGHT in top-level directory.
 */

/***********************************************************************
*                                                                      *
*   p4send.c                                                           *
*   MPI for MS-Windows 3.1                                             *
*   current version: 0.99b          06/10/95                           *
*                                                                      *
*   Joerg Meyer                                                        *
*   University of Nebraska at Omaha (UNO)                              *
*   Department of Computer Science                                     *
*                                                                      *
*   This is an MPI implementation for MS-Windows 3.1                   *
*   It is based on the MPI implementation from Argonne National        *
*   Laboratory and Mississippi State University, version from          *
*   June 17, 1994. Note their COPYRIGHT.                               *
*   ( source code and user's guide available by anonymous FTP from     *
*     info.mcs.anl.gov in directory /pub/mpi )                         *
*   Anyone is free to copy and modify this code to suit his or her     *
*   own purposes as long as these notices are retained.                *
*                                                                      *
***********************************************************************/

#include <mpiimpl.h>
#include <mpisys.h>
#pragma hdrstop

#ifndef lint
static char SCCSid[] = "%W% %G%";
#endif

#include <memory.h>
//#include "mpid.h"

/* 
   Still need to do:  clean up the post_short, post_long to look more like the
   code in chrecv.c .  Complicated slightly because the chrecv has already
   received part of the message, where here, the message header needs to
   be sent with, possibly, some of the data.
 */
/* Non-blocking code untested */
#ifndef PI_NO_NSEND 
#define PI_NO_NSEND
#endif

/***************************************************************************/
/* These routines enable the debugging output                              */
/***************************************************************************/
static Int DebugFlag = 0;   /* Set to 0 for no debug output */

void MPID_SetSendDebugFlag (Int f)
{
	DebugFlag = f;
}
/***************************************************************************/

/* 
   This file includes the routines to handle the device part of a send
   for Chameleon

   As a reminder, the first element is the device handle, the second is
   the (basically opaque) mpi handle
 */

/* Send a short (single packet message) */
Int MPID_P4_post_send_short (MPIR_SHANDLE far *dmpi_send_handle, Int len)
{
	MPID_SHANDLE far *mpid_send_handle;
	// char        *address;
	MPID_PACKET pkt;
	Int         actual_len;
	Int         dest;

	mpid_send_handle = &dmpi_send_handle->dev_shandle;
	pkt.len			 = len;
	pkt.context_id	 = dmpi_send_handle->contextid;
	pkt.tag			 = dmpi_send_handle->tag;
	pkt.mode		 = dmpi_send_handle->mode;
	pkt.lrank		 = dmpi_send_handle->lrank;
	dest             = dmpi_send_handle->dest;

	if (pkt.mode == MPIR_MODE_SYNCHRONOUS) 
	    MPID_P4_Get_Sync_Id( &pkt.mode, dmpi_send_handle, mpid_send_handle );

	if (DebugFlag) 
	{
	    printf( 
	 "[%ld]S Starting a send of tag = %ld, len = %ld, ctx = %ld, dest = %ld, mode=%lx\n",
		    __MYPROCID, pkt.tag, pkt.len, pkt.context_id, dest, pkt.mode );
	//    fflush( stdout );
	}

//#ifdef MPID_HAS_HETERO
/* Convert the header into canonical integer order */
//if (MPID_procinfo[dest].byte_order != MPID_byte_order) {
    /* Need to swap to receivers order.  We ALWAYS reorder at the
       sender's end */
//    SYByteSwapInt( &pkt, MPID_HEADER_INTS );
//    }
//#endif

	if (mpid_send_handle->start) 
	{
	    _fmemcpy( pkt.buffer, mpid_send_handle->start, (int)len );
	    if (DebugFlag) 
	    {
			printf( "[%ld]S Getting data from mpid->start, first int is %ld\n",
			        __MYPROCID, *(Int far *)&pkt.buffer[0] );
			if (len < 78) 
			{
			    Int i;
			    for (i=0; i<len; i++) 
			    {
					printf( "%lx", pkt.buffer[i] );
				}
			    printf( "\n" );
		    }
	//	fflush( stdout );
		}
    }
	else if (len > 0) 
	{
	    /* Note that if len is 0, we may not have a ->start pointer but we
	       still don't need to get any data */
	    actual_len = len;
	    DMPI_get_into_contig( dmpi_send_handle, (void far *)(pkt.buffer), 
							  MPID_PACKET_SIZE, &actual_len );
	    if (DebugFlag) 
	    {
			printf( 
		      "[%ld]S Getting data (%ld bytes) with get_into_contig, first int is %ld\n",
			        __MYPROCID, actual_len, *(Int *)&pkt.buffer[0] );
			if (pkt.len < 78) 
			{
			    Int i;
			    for (i=0; i<pkt.len; i++) 
			    {
					printf( "%lx", pkt.buffer[i] );
				}
			    printf( "\n" );
		    }
	//	fflush( stdout );
		}
    }
/* #if defined(MPID_HAS_HETERO) && 0
if (MPID_procinfo[dest].byte_order != MPID_byte_order) {
    SYByteSwapInt( pkt.buffer, );
    SYByteSwapShort( pkt.buffer, );
    SYByteSwapDouble( pkt.buffer, );
    }
#endif */

/* Always use a blocking send for short messages.
   (May fail with systems that do not provide adequate
   buffering.  These systems should switch to non-blocking sends)
 */
	if (DebugFlag) 
	{
		printf( "[%ld]S Sending message in a single packet...\n", __MYPROCID );
	//	fflush( stdout );
	}
/* In case the message is marked as non-blocking, indicate that we don't
   need to wait on it */
	mpid_send_handle->sid = 0;
	p4_sendx(MPID_PT2PT_TAG, dest, (char far *)(&pkt), len + MPID_HEADER_LEN, P4NOX );
	if ((pkt.mode & MPID_MODE_MASK) != MPIR_MODE_SYNCHRONOUS) 
	{
	    mpid_send_handle->done = MPIR_YES;
	    DMPI_mark_send_completed( dmpi_send_handle );
	}
	if (DebugFlag) 
	{
	    printf( "[%ld]S Sent message in a single packet...\n", __MYPROCID );
	//    fflush( stdout );
	}
	return MPI_SUCCESS;
}

/* Long message */
Int MPID_P4_post_send_long (MPIR_SHANDLE far *dmpi_send_handle, Int len)
{
	MPID_SHANDLE far *mpid_send_handle;
	char far 		*address;
	MPID_PACKET pkt;
	Int         actual_len;
	Int         dest;

	mpid_send_handle = &dmpi_send_handle->dev_shandle;
	pkt.len	       = len;
	pkt.context_id = dmpi_send_handle->contextid;
	pkt.tag	       = dmpi_send_handle->tag;
	pkt.mode       = dmpi_send_handle->mode;
	pkt.lrank      = dmpi_send_handle->lrank;
	dest           = dmpi_send_handle->dest;
	
	if (pkt.mode == MPIR_MODE_SYNCHRONOUS) 
	    MPID_P4_Get_Sync_Id( &pkt.mode, dmpi_send_handle, mpid_send_handle );

	if (DebugFlag) 
	{
	    printf( 
	 "[%ld]S Starting a send of tag = %ld, len = %ld, ctx = %ld, dest = %ld, mode=%lx\n",
		    __MYPROCID, pkt.tag, pkt.len, pkt.context_id, dest, pkt.mode );
	//    fflush( stdout );
    }

//#ifdef MPID_HAS_HETERO
/* Convert the header into canonical integer order */
//if (MPID_procinfo[dest].byte_order != MPID_byte_order) {
    /* Need to swap to receivers order.  We ALWAYS reorder at the
       sender's end */
//    SYByteSwapInt( &pkt, MPID_HEADER_INTS );
//    }
//#endif

	if (DebugFlag) 
	{
	    if (mpid_send_handle->start) 
	    {
			printf( "[%ld]S Getting data from mpid->start, first int is %ld\n",
			        __MYPROCID, *(Int *)&pkt.buffer[0] );
		//	fflush( stdout );
		}
	    else if (len > 0) 
	    {
		/* Note that if len is 0, we may not have a ->start pointer but we
		   still don't need to get any data */
		printf( 
	       "[%ld]S Getting data (%ld bytes) with get_into_contig, first int is %ld\n",
		        __MYPROCID, len, *(Int *)&pkt.buffer[0] );
	//	fflush( stdout );
		}
    }

/*	#if defined(MPID_HAS_HETERO) && 0
	if (MPID_procinfo[dest].byte_order != MPID_byte_order) {
	    SYByteSwapInt( pkt.buffer,  );
	    }
	#endif */
	
	if (DebugFlag) 
	{
	    printf( "[%ld]S Sending extra-long message...\n", __MYPROCID );
	//    fflush( stdout );
    }

	/* Send as packet only */
	p4_sendx(MPID_PT2PT_TAG,dest,(char far *)(&pkt),MPID_HEADER_LEN,P4NOX );
	mpid_send_handle->sid = 0;
	do {
	    actual_len = len;
	    if (mpid_send_handle->start) 
	    {
			address    = ((char far *)mpid_send_handle->start);
		}
	    else 
	    {
			DMPI_get_from_contig( dmpi_send_handle, (LPPVOID)(&address), 
					     -1, &actual_len );
		}         
		
/* #if defined(MPID_HAS_HETERO) && 0
    if (MPID_procinfo[dest].byte_order != MPID_byte_order) {
	SYByteSwapInt( address, );
	SYByteSwapShort( address, );
	SYByteSwapDouble( address, );
	} */
    /* Will need to swap back ... */
//#endif

#ifndef PI_NO_NSEND
	    if (mpid_send_handle->is_non_blocking) 
	    {
			if (mpid_send_handle->sid) 
			{
			    /* Not quite correct.  Assumes that only the sid field
			       is used; this is ok as long as the PI_NO_NSEND 
			       wrappers are used... */
			    p4_sendx(MPID_PT2PT2_TAG(__MYPROCID),0,(char far *)(address),actual_len,P4NOX);
		    }
			mpid_send_handle->sid = 0;
			p4_sendx(MPID_PT2PT2_TAG(__MYPROCID),dest,(char far *)(address),actual_len,P4NOX);
		}
    else 
#endif
			p4_sendx(MPID_PT2PT2_TAG(__MYPROCID), dest, (char far *)(address),
										actual_len, P4NOX );
	    len -= actual_len;
    } while (len > 0);

#ifndef PI_NO_NSEND
	if ((pkt.mode & MPID_MODE_MASK) != MPIR_MODE_SYNCHRONOUS) 
	{
	    mpid_send_handle->done = 1;
	    DMPI_mark_send_completed( dmpi_send_handle );
    }
#else
	if (dmpi_send_handle->mode != MPIR_MODE_SYNCHRONOUS) 
	{
	    mpid_send_handle->done = MPIR_YES;
	    DMPI_mark_send_completed( dmpi_send_handle );
    }
#endif
	return MPI_SUCCESS;
}


/*
   This implementation immediately sends the data.

   It takes advantage of being provided with the address of the user-buffer
   in the contiguous case.
 */
Int MPID_P4_post_send (MPIR_SHANDLE far *dmpi_send_handle)
{
	MPID_SHANDLE far *mpid_send_handle;
	//char        *address;
	//MPID_PACKET pkt;
	Int         actual_len;
	//Int         dest, len;

	mpid_send_handle = &dmpi_send_handle->dev_shandle;
	/* Get the header */
	if (mpid_send_handle->start) 
	{
	    actual_len = mpid_send_handle->bytes_as_contig;
    }
	else 
	{
    	DMPI_get_totallen( dmpi_send_handle, &actual_len );
    }
    
	if (actual_len > MPID_PACKET_SIZE) 
	    return MPID_P4_post_send_long( dmpi_send_handle, actual_len );
	else
	    return MPID_P4_post_send_short( dmpi_send_handle, actual_len );
}


/*
  Chameleon gets no asynchronous notice that the message has been complete,
  so there is no asynchronous ref to DMPI_mark_send_completed.
 */
Int MPID_P4_isend_wait (MPIR_SHANDLE far *dmpi_send_handle)
{
	MPID_SHANDLE far *mpid_send_handle;

	mpid_send_handle = &dmpi_send_handle->dev_shandle;

	/* Wait on the message */
#ifndef PI_NO_NSEND
	if (mpid_send_handle->sid) 
	{
	    /* We don't use non-blocking if the message is short enough... */
	    p4_sendx(MPID_PT2PTTAG(mpid_send_handle->to),mpid_send_handle->to,
	    (char far *)((void far *)0),mpid_send_handle->len,P4NOX);
	    mpid_send_handle->sid = 0;
    }
#endif
	if (dmpi_send_handle->mode != MPIR_MODE_SYNCHRONOUS) 
	{
	    mpid_send_handle->done = MPIR_YES;
	    DMPI_mark_send_completed( dmpi_send_handle );
    }
/* BUG - if a nonblocking, synchronous send is used, we may wait on it
   again when complete send is called.  We must make sure that in this case, 
   only MPID_P4_complete_send calls this routine 
 */

	return MPI_SUCCESS;
}

void MPID_P4_complete_send (MPIR_SHANDLE far *dmpi_send_handle,
							MPI_Status   far *status)
{
	MPID_SHANDLE far *mpid_send_handle = &dmpi_send_handle->dev_shandle;

	status = status;

/* Check to see if we need to complete the send. */
	if (DebugFlag) 
	{
		printf( "[%ld]S Entering complete send...\n", __MYPROCID );
	//	fflush( stdout );
	}
	if (!mpid_send_handle->done && mpid_send_handle->is_non_blocking)  
	{
	    MPID_P4_isend_wait( dmpi_send_handle );
    }
	if (DebugFlag) 
	{
		printf( "[%ld]S Entering complete send while loop...\n", __MYPROCID );
	//	fflush( stdout );
	}
	while (!mpid_send_handle->done) 
	{
	    /* This waits for the completion of a synchronous send */
	    (void)MPID_P4_check_incoming( MPID_BLOCKING );
    }
	if (DebugFlag) 
	{
		printf( "[%ld]S Exiting complete send...\n", __MYPROCID );
	//	fflush( stdout );
	}
}





