Mac-Source 1994 July

home *** CD-ROM | disk | FTP | other *** search

/ Mac-Source 1994 July / Mac-Source_July_1994.iso / C and C++ / System / Ultra / UltraLib.c < prev

Wrap

C/C++ Source or Header | 1992-12-08 | 15.4 KB | 590 lines | [TEXT/KAHL]

/************************** ULTRA (v1.0) ************************** This is a Macintosh/Motorola implementation of the Ultra pseudo-random number generator -- the state-of-the-art as of 11/92. It requires a 68020/68881 (or better) chip set. References -- Creator: Prof. George Marsaglia and Co-workers Dept. of Statistics Florida State University Code by: Dr. Michael P. McLaughlin MITRE Corp. MS W337 7525 Colshire Dr. McLean, VA 22102 E-mail: mpmcl@mitre.org Please acknowledge the work of others by leaving this header intact. ************************************************************************ --------------- General Remarks --------------- This code was written in a Think C <TM> environment but should be readily portable. Assembly language was used to reduce execution time. However, to ensure portablilty, C was used in a few places for functions returning a float or double. Things to watch out for include the following: Compiler options for 68020 and 68881 are REQUIRED. sizeof(double) is compiler-option dependent and is here unspecified. Longs and floats are 4 bytes. Integers are 2 bytes. Functions returning bytes or bits are pre-cast to integer. Registers overwritten --> D0,D1,D2,A0,A1,FP0,FP1,FP2,FP3 WARNING! Be sure to compile and run UltraDemo in your desired environment. Check that your output is identical to that supplied with this package. Users are cautioned that this code is highly optimized. Even seemingly innocuous changes, such as rearranging the functions below, may cause significant performance degradation. --------------- Timings --------------- The execution times for the functions supplied are somewhat variable and are increased by factors such as instruction-cache overflow and program segmentation. The timings below are averages of 1-10 million calls using a standard MacIIci (68030/68882) with extensions off and the calling module and UltraLib in the same segment. ("Native Floating Point" was not used.) To facilitate comparisons, the calling overhead has been subtracted out. Your timings may differ. Function Microseconds per Call Ultra_long32 4.7 Ultra_long31 4.9 Ultra_int16 2.9 Ultra_int15 3.1 Ultra_int8 2.2 Ultra_int8u 2.2 Ultra_int7 2.3 Ultra_int1 1.6 Ultra_uni 14.3 Ultra_vni 16.2 Ultra_duni 31.8 Ultra_dvni 29.6 Ultra_norm 50.5 Ultra_expo 39.5 Ultra_SaveStart 48.0 Ultra_RecallStart 49.0 ***********************************************************************/ #include <math.h> struct { /* to restart Ultra at a known beginning */ float Ugauss; unsigned long UFSR[37],USWB[37],Ubits,Useed1,Useed2; int Ubrw,Ubyt,Ubit,Udum; char *Uptr; } Ultra_Remember; /* 324 bytes */ double Ultra_2n63,Ultra_2n31,Ultra_2n7; float Ultra_gauss; /* remaining Ultra_norm */ unsigned long Ultra_FSR[37], /* feedback-shift-register output */ Ultra_SWB[37], /* lagged-Fibonacci output */ Ultra_bits, /* bits for Ultra_int1 */ Ultra_seed1, /* seeds MUST be initialized with */ Ultra_seed2; /* non-zero values */ int Ultra_brw, /* borrow bit */ Ultra_byt, /* # bytes left in Ultra_FSR[37] */ Ultra_bit, /* # bits left in Ultra_bits */ Ultra_dum; /* dummy, to give multiple of 4 */ char *Ultra_ptr; /* running pointer to Ultra_FSR */ /* Ultra_Refill() is the core of Ultra (see Marsaglia and Zaman, 1991, "A New Class of Random Number Generators", Annals of Applied Probability, vol. 1(3), 426-480). It refills Ultra_SWB[37] and uses a feedback-shift algorithm to overlay a second PRNG, thus creating Ultra_FSR[37] which supplies all of the Ultra-random bytes. */ void Ultra_Refill() /* Unrolling the loops is actually slower! */ { asm { LEA Ultra_SWB,A1 ; Ultra_SWB[0] LEA 52(A1),A0 ; Ultra_SWB[13] MOVEQ #0,D0 ; restore extend bit SUB Ultra_brw,D0 MOVEQ #23,D2 ; 24 of these @cont1 MOVE.L (A0)+,D0 MOVE.L (A1),D1 SUBX.L D1,D0 MOVE.L D0,(A1)+ DBRA D2,@cont1 LEA Ultra_SWB,A0 MOVEQ #12,D2 ; 13 of these @cont2 MOVE.L (A0)+,D0 MOVE.L (A1),D1 SUBX.L D1,D0 ; "subtract with borrow" MOVE.L D0,(A1)+ DBRA D2,@cont2 MOVEQ #0,D0 MOVE.L D0,D1 ADDX D1,D0 ; get borrow bit MOVE D0,Ultra_brw ; and save it LEA Ultra_SWB,A0 LEA Ultra_FSR,A1 MOVE.L A1,Ultra_ptr ; reinitialize running pointer MOVE.L Ultra_seed1,D0 MOVE.L #69069,D1 ; overlay feedback-shift PRNG MOVEQ #36,D2 ; 37 of these @cont3 MOVE.L (A0)+,(A1) MULU.L D1,D0 EOR.L D0,(A1)+ DBRA D2,@cont3 MOVE.L D0,Ultra_seed1 ; save global for next time MOVE #148,Ultra_byt ; 4*37 bytes now available } } /* Ultra_int7() returns a two-byte integer, U[0,127]. */ int Ultra_int7() { asm { SUBQ #1,Ultra_byt ; one byte left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVEQ #0,D0 ; clear long for Ultra_uni() MOVE.B (A0),D0 BCLR.L #7,D0 ; clear sign bit ADDQ.L #1,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #1,Ultra_byt BRA @cont1 } } /* Ultra_long32() returns a four-byte long, U[-2147483648,2147483647]. It may, of course, be cast to unsigned long. */ long Ultra_long32() { asm { SUBQ #4,Ultra_byt ; four bytes left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVE.L (A0),D0 ADDQ.L #4,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #4,Ultra_byt BRA @cont1 } } /* Ultra_uni() returns a four-byte float, U(0,1), with a mantissa of at least 25-bit precision. Ultra_uni() achieves this precision by continually examining the leading byte of the mantissa. If this byte is zero, it is replaced with a new random byte and the decimal point readjusted. This procedure also ensures that Ultra_uni() never returns zero. */ float Ultra_uni() { float result; /* inserted for portability */ asm { FMOVE.X Ultra_2n31,FP0 ; modulus SUBQ #4,Ultra_byt ; modified Ultra_long31 BMI.S @cont3 @cont1 MOVE.L Ultra_ptr,A0 MOVE.L (A0),D1 BCLR.L #31,D1 ADDQ.L #4,Ultra_ptr MOVE.L D1,D2 AND.L #0xFF000000,D2 ; test leading byte BEQ.S @cont4 ; branch if zero @cont2 FMUL.L D1,FP0 ; normalize FMOVE.S FP0,result } return(result); asm { @cont3 JSR Ultra_Refill SUBQ #4,Ultra_byt BRA @cont1 @cont4 JSR Ultra_int7 ; get another leading byte FMUL.X Ultra_2n7,FP0 ; and re-scale TST.B D0 ; make sure new byte is non-zero BEQ @cont4 ; else, try again ROR.L #8,D0 ; low -> high byte OR.L D0,D1 ; replace high byte of longword BRA @cont2 } } /* Ultra_vni() returns a four-byte float, U(-1,1), with the same features as described above for Ultra_uni(). */ float Ultra_vni() { float result; /* inserted for portability */ asm { FMOVE.X Ultra_2n31,FP0 ; modulus SUBQ #4,Ultra_byt ; modified Ultra_long32 BMI.S @cont4 @cont1 MOVE.L Ultra_ptr,A0 MOVE.L (A0),D1 BPL.S @cont2 FNEG.X FP0 ; else modulus = -modulus NEG.L D1 ; and D1 = abs(D1) @cont2 ADDQ.L #4,Ultra_ptr ; continue as in Ultra_uni() MOVE.L D1,D2 AND.L #0xFF000000,D2 ; test leading byte BEQ.S @cont5 ; branch if zero @cont3 FMUL.L D1,FP0 ; normalize FMOVE.S FP0,result } return(result); asm { @cont4 JSR Ultra_Refill SUBQ #4,Ultra_byt BRA @cont1 @cont5 JSR Ultra_int7 ; get another leading byte FMUL.X Ultra_2n7,FP0 ; and re-scale TST.B D0 ; make sure new byte is non-zero BEQ @cont5 ; else, try again ROR.L #8,D0 ; low -> high byte OR.L D0,D1 ; replace high byte of longword BRA @cont3 } } /* Ultra_norm() returns a four-byte float, N(mu,sigma). The normal variates returned are exact, not approximate. Ultra_norm() uses Ultra_vni() so there is no possibility of a result exactly equal to mu. Note that mu and sigma must also be floats, not doubles. */ float Ultra_norm(float mu, float sigma) { float result; /* inserted for portability */ asm { MOVE.L Ultra_gauss,result ; is there one left? BEQ.S @cont1 CLR.L Ultra_gauss } return(sigma*result + mu); asm { @cont1 BSR.S @vni ; "Polar" method FMOVE.X FP0,FP1 BSR.S @vni FMOVE.X FP0,FP2 FMOVE.X FP1,FP3 FMUL.X FP2,FP2 FMUL.X FP3,FP3 FADD.X FP3,FP2 FCMP.X #1,FP2 FBGE @cont1 ; Prob(branch) = 1 - π/4 FMOVE.X FP2,FP3 FLOGN.X FP2 FADD.X FP2,FP2 FNEG.X FP2 FDIV.X FP3,FP2 FSQRT FP2 FMUL.X FP2,FP0 ; --> standard normals FMUL.X FP2,FP1 FMOVE.S FP0,Ultra_gauss ; save the second one FMUL.S sigma,FP1 FADD.S mu,FP1 FMOVE.S FP1,result ; and return the first } return(result); asm { @vni FMOVE.X Ultra_2n31,FP0 ; modified Ultra_vni SUBQ #4,Ultra_byt ; modified Ultra_long32 BMI.S @vni4 @vni1 MOVE.L Ultra_ptr,A0 MOVE.L (A0),D1 BPL.S @vni2 FNEG.X FP0 ; else modulus = -modulus NEG.L D1 ; and D1 = abs(D1) @vni2 ADDQ.L #4,Ultra_ptr ; continue as in Ultra_uni() MOVE.L D1,D2 AND.L #0xFF000000,D2 ; test leading byte BEQ.S @vni5 ; branch if zero @vni3 FMUL.L D1,FP0 ; normalize RTS @vni4 JSR Ultra_Refill SUBQ #4,Ultra_byt BRA @vni1 @vni5 JSR Ultra_int7 ; get another leading byte FMUL.X Ultra_2n7,FP0 ; and re-scale TST.B D0 ; make sure new byte is non-zero BEQ @vni5 ; else, try again ROR.L #8,D0 ; low -> high byte OR.L D0,D1 ; replace high byte of longword BRA @vni3 } } /* Ultra_int1() returns a two-byte integer, U[0,1]. It uses Ultra_long32() and returns the bits one at a time. Unlike most PRNGs, all the bits are equally random. */ int Ultra_int1() { asm { MOVE Ultra_bit,D1 ; one bit left? BPL.S @cont1 ; faster than BMI.S!? JSR Ultra_long32 ; else, get 32 more MOVE.L D0,Ultra_bits MOVEQ #31,D1 MOVE D1,Ultra_bit ; bit number to test @cont1 MOVE.L Ultra_bits,D2 ; to see all 32 bits with BTST MOVEQ #0,D0 BTST.L D1,D2 ; examine bit #D1 BEQ.S @cont2 MOVEQ #1,D0 @cont2 SUBQ #1,Ultra_bit RTS } } /* Ultra_long31() returns a four-byte integer, U[0,2147483647]. */ long Ultra_long31() { asm { SUBQ #4,Ultra_byt ; four bytes left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVE.L (A0),D0 BCLR.L #31,D0 ; clear sign bit ADDQ.L #4,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #4,Ultra_byt BRA @cont1 } } /* Ultra_int16() returns a two-byte integer, U[-32768,32767]. */ int Ultra_int16() { asm { SUBQ #2,Ultra_byt ; two bytes left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVE (A0),D0 ADDQ.L #2,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #2,Ultra_byt BRA @cont1 } } /* Ultra_int15() returns a two-byte integer, U[0,32767]. */ int Ultra_int15() { asm { SUBQ #2,Ultra_byt ; two bytes left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVE (A0),D0 BCLR.L #15,D0 ; clear sign bit ADDQ.L #2,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #2,Ultra_byt BRA @cont1 } } /* Ultra_int8() returns a two-byte integer, U[-128,127]. It gets a random byte and casts it to integer. This operation extends the sign bit. Consequently, you may NOT cast this function to unsigned integer or long (see Ultra_int8u() below). */ int Ultra_int8() { asm { SUBQ #1,Ultra_byt ; one byte left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVE.B (A0),D0 EXT.W D0 ; make D0 an int ADDQ.L #1,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #1,Ultra_byt BRA @cont1 } } /* Ultra_int8u() returns a two-byte integer, U[0,255]. It proceeds as in Ultra_int8() but clears the high byte instead of extending the sign bit. */ int Ultra_int8u() /* unsigned byte cast to int */ { asm { SUBQ #1,Ultra_byt ; one byte left? BMI.S @cont2 ; branch if not @cont1 MOVE.L Ultra_ptr,A0 MOVEQ #0,D0 ; clear high byte MOVE.B (A0),D0 ADDQ.L #1,Ultra_ptr RTS @cont2 JSR Ultra_Refill SUBQ #1,Ultra_byt BRA @cont1 } } /* Ultra_duni() and Ultra_dvni return double-precision U[0,1) and U(-1,1). In both cases, zero IS a possibility. These functions are intended for those occasions when seven significant figures are not enough. If you need type double, but not 63 bits of precision, then it is MUCH faster to use Ultra_uni() or Ultra_vni() and cast -- implicitly or explicitly. Because C compilers usually support several different double formats, each with its own set-up requirements, these functions were written in C. */ double Ultra_duni() { return(Ultra_long31()*Ultra_2n31 + ((unsigned long) Ultra_long32())*Ultra_2n63); } double Ultra_dvni() { return(Ultra_long32()*Ultra_2n31 + ((unsigned long) Ultra_long32())*Ultra_2n63); } /* Ultra_expo() returns a four-byte float, Exponential(mu). The parameter, mu, is both the mean and variance of this distribution. It must be a float greater than zero. */ float Ultra_expo(float mu) { float result; /* inserted for portability */ asm { FMOVE.X Ultra_2n31,FP0 ; modified Ultra_uni SUBQ #4,Ultra_byt ; modified Ultra_long31 BMI.S @cont3 @cont1 MOVE.L Ultra_ptr,A0 MOVE.L (A0),D1 BCLR.L #31,D1 ADDQ.L #4,Ultra_ptr MOVE.L D1,D2 AND.L #0xFF000000,D2 ; test leading byte BEQ.S @cont4 ; branch if zero @cont2 FMUL.L D1,FP0 ; normalize FLOGN.X FP0 FNEG.X FP0 FMUL.S mu,FP0 FMOVE.S FP0,result } return(result); asm { @cont3 JSR Ultra_Refill SUBQ #4,Ultra_byt BRA @cont1 @cont4 JSR Ultra_int7 ; get another leading byte FMUL.X Ultra_2n7,FP0 ; and re-scale TST.B D0 ; make sure new byte is non-zero BEQ @cont4 ; else, try again ROR.L #8,D0 ; low -> high byte OR.L D0,D1 ; replace high byte of longword BRA @cont2 } } /* Ultra_SaveStart() and Ultra_RecallStart() save and restore, respectively, the global variables of Ultra, from Ultra_gauss through Ultra_ptr. If you think it may be necessary to recall a sequence of random numbers exactly, first call Ultra_SaveStart(). To recover the sequence later, call Ultra_RecallStart(). If you wish to terminate the program and still recover the same random numbers, you must save the array Ultra_Remember[] to a file and read it back upon restart. Ultra_Remember[] is declared in the header file as an array of unsigned longs even though only some of its variables are actually of this type. */ void Ultra_SaveStart() { asm { LEA Ultra_gauss,A0 LEA Ultra_Remember,A1 MOVEQ #80,D0 ; 81 longs @cont MOVE.L (A0)+,(A1)+ DBRA D0,@cont } } void Ultra_RecallStart() { asm { LEA Ultra_gauss,A0 LEA Ultra_Remember,A1 MOVEQ #80,D0 ; 81 longs @cont MOVE.L (A1)+,(A0)+ DBRA D0,@cont } } /* Ultra_Init() computes a few global constants, initializes others and fills in the initial Ultra_SWB array using the user-supplied seeds which must be non-zero unsigned longs. It terminates by calling Ultra_SaveStart() so that you may recover the whole sequence of random numbers by calling Ultra_Init(), with the same seeds, then calling Ultra_RecallStart(). */ void Ultra_Init() { unsigned long tempbits,ul; int i,j; if ((Ultra_seed1 == 0) || (Ultra_seed2 == 0)) { puts("\nYou forgot to supply seeds for Ultra!!"); exit(1); } for (i=0;i<37;i++) { tempbits = 0; for (j=32;j>0;j--) { Ultra_seed1 *= 69069; Ultra_seed2 ^= (Ultra_seed2 >> 15); Ultra_seed2 ^= (Ultra_seed2 << 17); ul = Ultra_seed1 ^ Ultra_seed2; tempbits = (tempbits >> 1) | (0x80000000 & ul); } Ultra_SWB[i] = tempbits; } Ultra_2n31 = ((2.0/65536)/65536); Ultra_2n63 = 0.5*Ultra_2n31*Ultra_2n31; Ultra_2n7 = 1.0/128; Ultra_gauss = 0.0; Ultra_byt = Ultra_brw = 0; Ultra_bit = -1; Ultra_SaveStart(); }