home *** CD-ROM | disk | FTP | other *** search
- /*
- ULTRA IN C
-
- This is the ULTRA random number generator written entirely
- in C. Efficiency has always been sacrificed for readability.
-
- Hopefully this may serve as a model for an assembler version
- of this routine. The programmer should avoid simply duplicating
- and should use the features usually present in any assembler
- to increase the speed of this routine.
-
- Especially the subroutine SWB should be replaced by the one
- machine instruction (usually called subtract-with-borrow) that
- is available in almost every hardware.
-
- For people not familiar with 8086 assembler, it may help to
- consult this when reading the assembler code. The output of
- this program should exactly match the output of ULTRADEM.C
- linked with ULTRA.C
- */
-
- #define N 37 /* The number of 32 bit words in the table */
-
- unsigned long swbx[N],
- x[N] =
- {2514488732, 431628406, 1236830750, 4003720720, 823319423, 94684520,
- 1527338240, 679520694, 2422692153, 3355680443, 728196457, 701184857,
- 1109123469, 3203485719, 1941719477, 2470545560, 1105091547, 2688956238,
- 3621637922, 1273782189, 4288988815, 764409422, 2813964156, 2010860282,
- 683656203, 3648603015, 73133971, 3968408432, 3402270263, 3808241722,
- 2695095159, 2490074496, 1477949423, 3995273805, 760366478, 2466020322,
- 2727416986}; /* seed array for the SWB generator. */
-
- char bits, /* Eight bits to be fed out one at a time by i1bit. */
- nbits=0, /* Number of fresh bits remaining in bits. */
- flags=0; /* The state of the carry flag for the SWB generator. */
- int swbn=0; /* Number of fresh bytes remaining in swbneg. */
- long unsigned congx = 0x2C6CE807;
-
- /* rinit initializes the constants and creates a seed array one bit at
- a time by taking the leading bit of the xor of a shift register
- and a congruential sequence. The same congruential generator continues
- to be used as a mixing generator for the Subtract-with-borrow generator
- to produce the `ultra' random numbers
-
- Since this is called just once, speed doesn't matter much and it might
- be fine to leave this subroutine coded just as it is.
- */
-
- void rinit(long unsigned congy,long unsigned shrgx)
- { short i,j;
- unsigned long tidbits;
- congx=congy;
- for (i=0;i<N;i++)
- { bits = 0;
- for (j=32;j>0;j--)
- { congx = congx * 69069;
- shrgx = shrgx ^ (shrgx >> 15);
- shrgx = shrgx ^ (shrgx << 17);
- tidbits = (tidbits>>1) | (0x80000000 & (congx^shrgx));
- }
- x[i] = tidbits;
- }
- swbn = 0;
- nbits = 0;
- flags = 0;
- }
-
- /* SWB is the subtract-with-borrow operation which should be one line
- in assembler code. In order to do a 32 bit s-w-b, we do two 16 bit
- s-w-b's in 32 bit variables and look for the borrow bit in their
- 17th bit.
-
- In a proper impelementation, this would subroutine would not exist,
- and would be be replaced by hardware s-w-b operation in the SWBFill
- subroutine.
- */
-
- long SWB(long x, long y, char *c)
- { long hi,lo;
- lo = (x & 0x0000FFFF) - (y & 0x0000FFFF) - *c;
- *c = ((lo&0x00010000) != 0);
- lo = lo & 0x0000FFFF;
- x = x>>16;
- y = y>>16;
- hi = (x & 0x0000FFFF) - (y & 0x0000FFFF) - *c;
- *c = ((hi&0x00010000) != 0);
- return (hi << 16) | lo;
- }
-
- /* This is the heart of the system and should be written in assembler
- to be as fast as possible. It may be good to unravel the loop and
- simply write 37 consecutive SWB operations!
- */
-
- void SWBFill(void)
- { short i;
- for (i=0; i<24; i++) x[i] = SWB(x[i+13],x[i],&flags);
- for (i=24; i<N; i++) x[i] = SWB(x[i-24],x[i],&flags);
- for (i=0; i<N; i++) swbx[i] = x[i] ^ (congx = congx * 69069);
- }
-
- /* The checkfill routine is written out of laziness. In a proper
- implementation this should not ba a subroutine call. Instead
- each routine should do it's own check internally.
-
- The routine checks to see if there are k bytes left in the array.
- If there aren't it refills the array and sets the appropriate
- counts and pointers. Once there are k bytes available, it returns
- a pointer to the first free byte, and decrements the free byte
- count by k.
-
- By converting the pointer type, this can be used to implement
- all the integer types of random numbers.
- */
-
- char *swbp;
- char *CheckFill(char k)
- { if ( swbn-k < 0 ) /* If k bytes not available */
- { SWBFill(); /* Refill the bit array */
- swbn=4*N; /* Reset counter */
- swbp=(char *) swbx; /* Reset pointer */
- };
- swbn = swbn - k; /* decrement the count by k */
- swbp = swbp + k; /* increment the pointer by k */
- return swbp - k; /* return the old pointer value */
- }
-
- /* The following routines should be coded in assembler or
- perhaps as macros in C */
-
- long i32bit(void) { return *(long *) CheckFill(4); }
- long i31bit(void) { return *(long *) CheckFill(4) & 0x7FFFFFFF; }
- short i16bit(void) { return *(short *) CheckFill(2); }
- short i15bit(void) { return *(short *) CheckFill(2) & 0x7FFF; }
- char i8bit(void) { return *CheckFill(1); }
- char i7bit(void) { return *CheckFill(1) & 0x7F; }
-
- /* The uni/vni and the 1-bit routines are perhaps best done in assembler
- because they involve a lot of minute bit/byte twiddling
-
- Often I have used 24-bit shifts etc to get access to a byte. Obviously
- in a byte addressable architecture this could be done with great ease.
-
- I freely call i32bit and i31bit to create the uni's and vni's. It
- would probably be faster to duplicate the code of those routines,
- and avoid the subroutine call overhead. This is especially true
- because uni and vni are probably the most often called form of any
- random number generator.
- */
-
- char i1bit(void)
- { char temp;
- if (nbits==0)
- { bits = i8bit();
- nbits = 7;
- } else nbits--;
- temp = bits & 1;
- bits = bits >>1;
- return temp;
- }
-
- #define two2neg31 ( (2.0/0x10000) / 0x10000 )
- #define two2neg32 ( (1.0/0x10000) / 0x10000 )
-
- float vni(void)
- { float scale;
- long temp; unsigned long leadbyte;
- scale = two2neg31;
- temp = i32bit();
- leadbyte = temp & 0xFF000000;
- while ((leadbyte == 0xFF000000) | (leadbyte == 0))
- {
- /* This following two lines are executed once every 128 calls,
- and so it doesn't need to be crafted too efficiently */
- leadbyte = (unsigned long) i8bit() << 24;
- scale = scale/128;
- }
- temp = (temp & 0x00FFFFFF) | leadbyte;
- return temp * scale;
- }
-
- float uni(void)
- { float scale;
- long temp,leadbyte;
- scale = two2neg31;
- temp = i31bit();
- leadbyte = temp & 0xFF000000;
- while (leadbyte == 0)
- { /* This following two lines are executed once every 128 calls,
- and so it doesn't need to be crafted too efficiently */
- leadbyte = ((long) i7bit()) << 24;
- scale = scale/128;
- }
- temp = (temp & 0x00FFFFFF) | leadbyte;
- return temp*scale;
- }
-
- /* The duni and dvni routines are written to take eight consecutive
- bytes and simply float them. No care is taken if the leading bits
- are insignificant becuase with such a high precision, a few leading
- bits shouldn't matter.
-
- It is worth exploring whether it is faster to float a number using
- a floating point processor, or just by counting the number of leading
- bits that are zero, and inserting the exponent by hand (i.e. with
- the main cpu).
- */
-
- double dvni(void)
- { signed long *temp;
- unsigned long lo;
- temp = (signed long *) CheckFill(8);
- lo = (unsigned long) *(temp++);
- return ((*temp) + lo * two2neg32) * two2neg31; }
-
- double duni(void)
- { unsigned long *temp, lo;
- temp = (unsigned long *) CheckFill(8);
- lo = *(temp++);
- return ( ((*temp)&0x7FFFFFFF) + lo * two2neg32) * two2neg31; }
-