/* @TITLE "aux.c: non-cycle-counted auxiliary functions"*/
/* 
 * Some non-cycle-counted auxiliary functions, mostly 
 * translations to libc functions. 
 *
 * functions:
 *   noncyc_free
 *   AllocWords
 *   noncyc_WordCopy
 *
 * The reason:
 * "The CYCLE_COUNTING macros affect the statically enclosed code.  There
 * is a bug in the current version that will be fixed with V3.0: calls to
 * cycle-counted library routines should not be cycle-counted inside
 * non-cycle-counted regions.  This somewhat contradicts what I said
 * above, but it seems to make more sense.  The problem now is that you
 * can't easily get to the non-cycle-counted version of malloc from
 * within an augmented file; augment always renames them with calls to
 * the cycle-counted versions.  In the new version, augment will only do
 * the translation for cycle-counted regions.  For user-defined
 * procedures, there is only one version, so it either be always cycle
 * counted or never cycle counted."  [Eric Brewer]
 *
 * Part of 
 *           The STARFISH Parallel file-system simulator
 *      (Simulation Tool for Advanced Research in File Systems)
 *
 *                              David Kotz
 *                          Dartmouth College
 *                             Version 3.0
 *                             October 1996
 *                         dfk@cs.dartmouth.edu
 */

/* $Id: aux.c,v 3.0 1996/10/18 06:05:51 dfk RELEASE3 dfk $ */

#include "dmcache.h"	      /* the overall include file */
#include "aux.h"
#include "userdata.h"	      /* for UD_TRACE */
#include <math.h>	      /* for sqrt for Factor() and SubsetBlock() */

/* @SUBTITLE "AllocWords, noncyc_free: non-cycle-counted malloc and free" */
/* also, takes length in words, and returns Word* */
Word *
AllocWords(int wordcount)
{
    return((Word *)malloc(wordcount * sizeof(Word)));
}

void
noncyc_free(void *p)
{
    free(p);
}

void
noncyc_free2(void *dummy, void *p)
{
    free(p);
}

/* @SUBTITLE "noncyc_WordCopy: WordCopy, but not cycle-counted" */
void
noncyc_WordCopy(void *src, void *dest, int words)
{
    UD_TRACE(words * sizeof(Word), 0, "noncyc_WordCopy");
    bcopy(src, dest, words * sizeof(Word));
}

/* @SUBTITLE "DMAcopy: copy word-aligned memory like DMA"  */
/* Like WordCopy, but we count time as if it was done with DMA. 
 * Well, DMA in the foreground; basically, this means that we 
 * assume a hardware block-transfer instead of a load/store sequence.
 * We copy only copywords, but we charge time for costwords.  This is
 * handy when #undef REAL_DATA, since our callers are typically moving
 * some mix of meta-data and UserData (eg, an MBUFFER). 
 *  Source and destination buffers should both be word-aligned, but
 * since they could be either UserData or a pointer, I can't check.
 *  We assume that the DMA can write one word per cycle.
 */

void
DMAcopy(void *src, void *dest, int copywords, int costwords)
{
    register int groups;      /* groups of 8 words */
    register int leftover;    /* leftovers from the groups */

    UD_TRACE(copywords * sizeof(Word), costwords * sizeof(Word), "DMAcopy");

    /* do the actual copy as a non-cycle-counted copy */
    if (copywords > 0)
      noncyc_WordCopy(src, dest, copywords);
    else
      UD_TRACE(copywords * sizeof(Word), 0, "nocopy");

    /* Account for DMA startup a little bit:
     *  writing a one-word address	MEM_ACCESS_LATENCY
     *  writing a one-word count	MEM_ACCESS_LATENCY
     *  time until first word 		MEM_ACCESS_LATENCY-1
     */
    AddTime(3 * MEM_ACCESS_LATENCY - 1);

    QuantumCheck();	      /* manual check of clock against quantum */

    /* We want to AddTime(costwords),
     * but that would be too big of a jump. We must
     * do it in little jumps, as recommended in the manual.
     */
    groups = costwords >> 3;     /* groups of 8 words */
    leftover = costwords & 0x7;  /* leftovers from the groups */

    /* a partially "unrolled" loop  */
    while (groups-- > 0) {
	AddTime(8);	      /* 8 words moved per group */
	QuantumCheck();
    }

    AddTime(leftover);
    QuantumCheck();
}

/* @SUBTITLE "alloc2d - matrix memory allocation" */

/* Builds a C-like 2D matrix, with a side-vector for pointer-indirect
 * access to the matrix. 
 */

void **					/* pointer to matrix */
alloc2d(int rows, int cols, int elsize)
	/* number of rows and cols */
	/* size of each matrix element (bytes) */
{
    char *block;	      /* the big block that is the matrix */
    char **side;	      /* the side vector pointing to rows */
    int rowsize = cols * elsize; /* length of each row */
    char *arow;		      /* pointer to a row */
    int row;		      /* index of a row */

    block = malloc(rows * cols * elsize);
    side = (char **) malloc(rows * sizeof(char *));

    INVARIANT4(block != NULL && side != NULL,
	       "alloc2d: not enough memory for %d of size %d\n",
	       rows * cols, elsize);

    arow = block;
    for (row = 0; row < rows; row++) {
	side[row] = arow;
	arow += rowsize;
    }

    return((void **)side);
}

/* @SUBTITLE "free2d - frees a matrix allocated with alloc2d" */
/* Give us back the pointer you got from alloc2d. */
void
free2d(void *ptr)	      /* really (void **) */
{
    /* ptr is actually pointer to side vector */
    void **side = (void **) ptr;
    /* ptr to first row is also ptr to base of big block that holds data */
    char *block = (char *) (side[0]);

    free(block);
    free(side);
}


/* @SUBTITLE "Factor - factor a number" */
/* Factor n, n>0, into two factors f1 and f2, so that f1 is the largest factor 
 * less than sqrt(n).  Basically, so that f1 and f2 are as close together as 
 * possible, and f1 <= f2.  Very simplistic algorithm!
 */
void
Factor(int n, int *f1, int *f2)
{
    int x;		      /* potential factor */

    INVARIANT3(n > 0, "Factor: n (%d) must be > 0", n);

    x = (int) sqrt((double) n);

    /* Keep trying factors that are <= sqrt(n) until we find one. */
    /* At worst, we'll stop when x=1. */
    while(n % x != 0)
      x--;

    *f1 = x;		      /* the smaller factor */
    *f2 = n / x;	      /* the larger factor */
}

/* @SUBTITLE "SubsetBlock: is this block in the subset?" */
/* Used by Whole_read_subset in cp.ca and by split_emptyBlock 
 * in iopfs-general.ca.   What we do is to pick out all blocks that
 * represent prime odds, ie, where (block*2+1) is a prime number. 
 * Heck, that's a reasonably random subset of size of about 10%
 * in the ranges we're considering.
 */

boolean
SubsetBlock(ulong block)
{
    ulong p = block * 2 + 1;
    ulong s = sqrt((double) p);	/* sqrt of p */
    ulong d;		      /* potential divisor */

    /* p is known to be odd so we need not check d = 2 */
    for (d = 3; d <= s; d += 2) 
      if (p % d == 0)
	return(FALSE);	      /* d divides p */
    
    return(TRUE);	      /* p is prime */
}
