Newsgroups: comp.parallel.mpi
From: ciula@lerc.nasa.gov (Kim Ciula)
Subject: using LAM MPI with LSF queueing system
Organization: NASA Lewis Research Center
Date: 14 Mar 1997 11:14:22 -0500
Message-ID: <5gbtgu$mp9@hopper1.lerc.nasa.gov>

I'm trying to write a script to submit LAM MPI jobs
to a workstation cluster running the LSF queueing system.
The job seems to get submitted fine, but the MPI job is either
not running or the output is just getting lost (I've tried
it with several programs and several ways of redirecting
stdout and stderr within LSF).  Has anyone else wrote such
a script?  Could I see a copy if so or could you take a look
at what I have below?

   Thanks,

      Kim Ciula





#! /bin/sh 
#
COMMANDLINE="$@"

###################################
#Generate procgroup and boot schema
###################################

PROCGROUP=$HOME/.lsbatch/host$$.`hostname`
SCHEMA=$HOME/.lsbatch/schema$$.`hostname`
rm -f $PROCGROUP
rm -f $SCHEMA
nhosts=0
for word in $LSB_HOSTS 
do
   if [ $nhosts -eq 0 ] ; then
      echo "$word" | sed 's/lace/laceatm/g' >> $PROCGROUP
      FIRST=$word
      echo "$COMMANDLINE n$nhosts" >> $SCHEMA
   else
      echo "$word" | sed 's/lace/laceatm/g' >> $PROCGROUP
      echo "$COMMANDLINE n$nhosts" >> $SCHEMA
   fi
   nhosts=`expr $nhosts + 1`
done

echo 'PROCGROUP START'
cat $PROCGROUP
echo 'PROCGROUP END'
echo
echo 'SCHEMA START'
cat $SCHEMA
echo 'SCHEMA END'
echo

##################################
# run mpi job and save exit status
##################################

echo 'STARTING lamboot'
lamboot -v $PROCGROUP
echo 'DONE WITH lamboot'
echo

echo 'STARTING RUN'
#rsh $FIRST "mpirun -c2c -v $SCHEMA"
#rsh $FIRST "mpirun $SCHEMA"
mpirun $SCHEMA
echo 'DONE WITH RUN'
echo

echo 'STARTING wipe'
wipe -v $PROCGROUP
echo 'DONE WITH wipe'
echo

exstat=$?

##################
# cleanup and exit
##################

rm -f $PROCGROUP
rm -f $SCHEMA
exit $exstat


