/* CRCW Quicksort using fork DC strategy.    C.W. Kessler 12/95
 * sorts N elements using p processors 
 * in expected time O((N/p)log N) and space O(N log N).
 * synchronous version.
 */

#include <fork.h>
#include <assert.h>
#include <io.h>
#include <stdlib.h>
#include <math.h>

sh int N;   /* the number of array elements to be sorted */
sh int *a;  /* the array to be sorted */



/* compare function used by the sequential qsort() routine */

async int cmp( void *a, void *b )
{
  if (*(int *)a < *(int *)b) return -1;
  else
  if (*(int *)a > *(int *)b) return 1;
  else return 0;
}
  

/* quicksort n elements using p processors in place */

sync void qs( sh int *array, sh int n )
{
 sh int num[2];           /* number of processors for subgroups */
 sh int subn[3];          /* size of subarrays */
 sh int *subarray[3];     /* subarrays to be recursively sorted */
 sh int *subindex = (int *) shalloc(n);        /*temporary storage*/
 sh int *subarrayindex = (int *) shalloc(n);   /*temporary storage*/
 sh int numofprocsfor0;
 sh int pivot;
 sh int p = 0;
 pr int j;

 $=mpadd( &p, 1);     /* renumber $ and compute p */

 seq pprintf(" qs(%d,%d)\n", n,p);

 if (n<=1)  return;   /* trivial */
 if (n==2) {          /* simple: */
    if (array[0]>array[1]) {   /*swap:*/
         pivot=array[0]; array[0]=array[1]; array[1]=pivot;
    }
    return;
 }

 if (p==1) {  /* sequential computation */
    /* seqsort( array, n ); */
    farm  qsort( array, n, 1, cmp );
    return;
 }

 subn[0] = subn[1] = subn[2] = 0;
 num[0] = num[1] = 0;
 pivot = array[0];
 farm
   for (j=$; j<n; j+=p) {  /*in parallel select new subgroups for array elements*/
     if (array[j]<pivot)
                     { subarrayindex[j]=0; subindex[j]=mpadd(&subn[0],1); }
     else if (array[j]==pivot)
                     { subarrayindex[j]=1; subindex[j]=mpadd(&subn[1],1); }
          else       { subarrayindex[j]=2; subindex[j]=mpadd(&subn[2],1); }
   }
 /* now subn[k] holds the number of elements to be copied to subarray k */

 /* allocate subarrays and copy elements to them in parallel: */
 subarray[0] = (int *) shalloc( subn[0] );
 subarray[1] = (int *) shalloc( subn[1] );
 subarray[2] = (int *) shalloc( subn[2] );
 farm
   for (j=$; j<n; j+=p)
     subarray[subarrayindex[j]][subindex[j]] = array[j];

 if (subn[0]>1 && subn[2]>1) {   /* the general case */
    /* divide p processors in ratio subn[0] : subn[2] */
    numofprocsfor0 = (subn[0]*p)/(subn[0]+subn[2]);
    if (!numofprocsfor0)  numofprocsfor0 = 1;     /*correction*/
    if (numofprocsfor0==p) numofprocsfor0 = p-1;  /*correction*/
    fork ( 2; @=($<numofprocsfor0)? 0:1; ) {
       qs( subarray[2*@], subn[2*@] );
    }
 }
 else  
 if (subn[0]>1)  qs( subarray[0], subn[0] );
 else
 if (subn[2]>1)  qs( subarray[2], subn[2] );
 /* else do nothing; */

 /* now concatenate sorted subarrays: */
 farm  {        /*in parallel copy sorted subarrays into old array */
   for (j=$; j<subn[0]; j+=p)
     array[j] = subarray[0][j];
   for (j=$; j<subn[1]; j+=p)
     array[j+subn[0]] = subarray[1][j];
   for (j=$; j<subn[2]; j+=p)
     array[j+subn[0]+subn[1]] = subarray[2][j];
 } 
}


void print_array( void )
{
 pr int j;
 for (j=0; j<N; j++) printf(" %d", a[j] );
 printf("\n");
}


pr char c;

void main( void )
{
 pr int j;
 pr unsigned int starttime, stoptime;
 if ($==0) {
    printf("Enter N = ");
    scanf("%d", &N);
    printf("\nDisplay array (y/n)? ");
    scanf("%s", &c);
 }
 srand( 8*$*$*$ + 17 );    /* seed random generator */
 start {
   a = (int *) shalloc( N );
   farm
     for (j=$; j<N; j+=__STARTED_PROCS__)
       a[j] = abs(rand())%1024;           /*set array*/
   if (c=='y')  seq  print_array();
   farm  starttime = getct();

   qs( a, N ); 

   farm  stoptime = getct();
   if (c=='y')  seq  print_array();
 }
 if ($==0) {
   printf("\nTime: %u PRAM CPU Cycles\n", stoptime - starttime );
   printf("<Ctrl-C>"); scanf("%s", &c);
 }
 barrier; exit(0);
}

