Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members

asm_math.h

00001 #ifndef __asm_math_H__
00002 #define __asm_math_H__
00003 
00004 #include "peonstdafx.h"
00005 
00006 /*=============================================================================
00007  ASM math routines posted by davepermen et al on flipcode forums
00008 =============================================================================*/
00009 
00010 const float pi = 4.0 * atan( 1.0 );
00011 const float half_pi = 0.5 * pi;
00012 
00013 /*=============================================================================
00014         NO EXPLICIT RETURN REQUIRED FROM THESE METHODS!! 
00015 =============================================================================*/
00016 #if PEON_COMPILER == PEON_COMPILER_MSVC
00017 #       pragma warning( push )
00018 #       pragma warning( disable: 4035 ) 
00019 #endif
00020 
00021 float asm_arccos( float r ) {
00022     // return half_pi + arctan( r / -sqr( 1.f - r * r ) );
00023         
00024 #if PEON_COMPILER == PEON_COMPILER_MSVC
00025 
00026     float asm_one = 1.f;
00027     float asm_half_pi = half_pi;
00028     __asm {
00029         fld r // r0 = r
00030         fld r // r1 = r0, r0 = r
00031         fmul r // r0 = r0 * r
00032         fsubr asm_one // r0 = r0 - 1.f
00033         fsqrt // r0 = sqrtf( r0 )
00034         fchs // r0 = - r0
00035         fdiv // r0 = r1 / r0
00036         fld1 // {{ r0 = atan( r0 )
00037         fpatan // }}
00038         fadd asm_half_pi // r0 = r0 + pi / 2
00039     } // returns r0
00040 
00041 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00042 
00043         return float( acos( r ) );
00044 
00045 #endif
00046 }
00047 
00048 float asm_arcsin( float r ) {
00049     // return arctan( r / sqr( 1.f - r * r ) );
00050 
00051 #if PEON_COMPILER == PEON_COMPILER_MSVC
00052 
00053     const float asm_one = 1.f;
00054     __asm {
00055         fld r // r0 = r
00056         fld r // r1 = r0, r0 = r
00057         fmul r // r0 = r0 * r
00058         fsubr asm_one // r0 = r0 - 1.f
00059         fsqrt // r0 = sqrtf( r0 )
00060         fdiv // r0 = r1 / r0
00061         fld1 // {{ r0 = atan( r0 )
00062         fpatan // }}
00063     } // returns r0
00064 
00065 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00066 
00067         return float( asin( r ) );
00068 
00069 #endif
00070 
00071 }
00072 
00073 float asm_arctan( float r ) {
00074 
00075 #if PEON_COMPILER == PEON_COMPILER_MSVC
00076 
00077     __asm {
00078         fld r // r0 = r
00079         fld1 // {{ r0 = atan( r0 )
00080         fpatan // }}
00081     } // returns r0
00082 
00083 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00084 
00085         return float( atan( r ) );
00086 
00087 #endif
00088 
00089 }
00090 
00091 float asm_sin( float r ) {
00092 
00093 #if PEON_COMPILER == PEON_COMPILER_MSVC
00094 
00095     __asm {
00096         fld r // r0 = r
00097         fsin // r0 = sinf( r0 )
00098     } // returns r0
00099 
00100 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00101 
00102         return sin( r );
00103 
00104 #endif
00105 
00106 }
00107 
00108 float asm_cos( float r ) {
00109 
00110 #if PEON_COMPILER == PEON_COMPILER_MSVC
00111 
00112     __asm {
00113         fld r // r0 = r
00114         fcos // r0 = cosf( r0 )
00115     } // returns r0
00116 
00117 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00118         
00119         return cos( r );
00120 
00121 #endif
00122 }
00123 
00124 float asm_tan( float r ) {
00125 
00126 #if PEON_COMPILER == PEON_COMPILER_MSVC
00127 
00128     // return sin( r ) / cos( r );
00129     __asm {
00130         fld r // r0 = r
00131         fsin // r0 = sinf( r0 )
00132         fld r // r1 = r0, r0 = r
00133         fcos // r0 = cosf( r0 )
00134         fdiv // r0 = r1 / r0
00135     } // returns r0
00136 
00137 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00138         
00139         return tan( r );
00140 
00141 #endif
00142 }
00143 
00144 // returns a for a * a = r
00145 float asm_sqrt( float r )
00146 {
00147 #if PEON_COMPILER == PEON_COMPILER_MSVC
00148 
00149     __asm {
00150         fld r // r0 = r
00151         fsqrt // r0 = sqrtf( r0 )
00152     } // returns r0
00153 
00154 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00155 
00156         return sqrt( r );
00157 
00158 #endif
00159 }
00160 
00161 // returns 1 / a for a * a = r
00162 // -- Use this for Vector normalisation!!!
00163 float asm_rsq( float r )
00164 {
00165 #if PEON_COMPILER == PEON_COMPILER_MSVC
00166 
00167     __asm {
00168         fld1 // r0 = 1.f
00169         fld r // r1 = r0, r0 = r
00170         fsqrt // r0 = sqrtf( r0 )
00171         fdiv // r0 = r1 / r0
00172     } // returns r0
00173 
00174 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00175 
00176         return 1. / sqrt( r );
00177 
00178 #endif
00179 }
00180 
00181 // returns 1 / a for a * a = r
00182 // Another version
00183 float apx_rsq( float r ) {
00184 
00185 #if PEON_COMPILER == PEON_COMPILER_MSVC
00186 
00187     const float asm_dot5 = 0.5f;
00188     const float asm_1dot5 = 1.5f;
00189 
00190     __asm {
00191         fld r // r0 = r
00192         fmul asm_dot5 // r0 = r0 * .5f
00193         mov eax, r // eax = r
00194         shr eax, 0x1 // eax = eax >> 1
00195         neg eax // eax = -eax
00196         add eax, 0x5F400000 // eax = eax & MAGICAL NUMBER
00197         mov r, eax // r = eax
00198         fmul r // r0 = r0 * r
00199         fmul r // r0 = r0 * r
00200         fsubr asm_1dot5 // r0 = 1.5f - r0
00201         fmul r // r0 = r0 * r
00202     } // returns r0
00203 
00204 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00205 
00206         return 1. / sqrt( r );
00207 
00208 #endif
00209 }
00210 
00211 /* very MS-specific, commented out for now
00212    Finally the best InvSqrt implementation?
00213    Use for vector normalisation instead of 1/length() * x,y,z
00214 */
00215 #if PEON_COMPILER == PEON_COMPILER_MSVC
00216 
00217 __declspec(naked) float __fastcall InvSqrt(float fValue)
00218 {
00219     __asm
00220     {
00221         mov        eax, 0be6eb508h
00222         mov        dword ptr[esp-12],03fc00000h
00223         sub        eax, dword ptr[esp + 4]
00224         sub        dword ptr[esp+4], 800000h
00225         shr        eax, 1
00226         mov        dword ptr[esp -  8], eax
00227 
00228         fld        dword ptr[esp -  8]
00229         fmul    st, st
00230         fld        dword ptr[esp -  8]
00231         fxch    st(1)
00232         fmul    dword ptr[esp +  4]
00233         fld        dword ptr[esp - 12]
00234         fld        st(0)
00235         fsub    st,st(2)
00236 
00237         fld        st(1)
00238         fxch    st(1)
00239         fmul    st(3),st
00240         fmul    st(3),st
00241         fmulp    st(4),st
00242         fsub    st,st(2)
00243 
00244         fmul    st(2),st
00245         fmul    st(3),st
00246         fmulp    st(2),st
00247         fxch    st(1)
00248         fsubp    st(1),st
00249 
00250         fmulp    st(1), st
00251         ret 4
00252     }
00253 }
00254 
00255 #endif
00256 
00257 // returns a random number
00258 FORCEINLINE float asm_rand()
00259 {
00260 
00261 #if PEON_COMPILER == PEON_COMPILER_MSVC
00262   #if 0
00263     #if PEON_COMP_VER >= 1300
00264 
00265         static unsigned __int64 q = time( NULL );
00266 
00267         _asm {
00268                 movq mm0, q
00269 
00270                 // do the magic MMX thing
00271                 pshufw mm1, mm0, 0x1E
00272                 paddd mm0, mm1
00273 
00274                 // move to integer memory location and free MMX
00275                 movq q, mm0
00276                 emms
00277         }
00278 
00279         return float( q );
00280     #endif
00281   #else
00282     // VC6 does not support pshufw
00283     return float( rand() );
00284   #endif
00285 #else
00286     // GCC etc
00287 
00288         return float( rand() );
00289 
00290 #endif
00291 }
00292 
00293 // returns the maximum random number
00294 FORCEINLINE float asm_rand_max()
00295 {
00296 
00297 #if PEON_COMPILER == PEON_COMPILER_MSVC
00298   #if 0
00299     #if PEON_COMP_VER >= 1300
00300 
00301         return std::numeric_limits< unsigned __int64 >::max();
00302         return 9223372036854775807.0f;
00303     #endif
00304   #else
00305     // VC6 does not support unsigned __int64
00306     return float( RAND_MAX );
00307   #endif
00308 
00309 #else
00310     // GCC etc
00311         return float( RAND_MAX );
00312 
00313 #endif
00314 }
00315 
00316 // returns log2( r ) / log2( e )
00317 float asm_ln( float r ) {    
00318 
00319 #if PEON_COMPILER == PEON_COMPILER_MSVC
00320 
00321     const float asm_e = 2.71828182846f;
00322     const float asm_1_div_log2_e = .693147180559f;
00323     const float asm_neg1_div_3 = -.33333333333333333333333333333f;
00324     const float asm_neg2_div_3 = -.66666666666666666666666666667f;
00325     const float asm_2 = 2.f;
00326 
00327     int log_2 = 0;
00328 
00329     __asm {
00330         // log_2 = ( ( r >> 0x17 ) & 0xFF ) - 0x80;
00331         mov eax, r
00332         sar eax, 0x17
00333         and eax, 0xFF
00334         sub eax, 0x80
00335         mov log_2, eax
00336 
00337         // r = ( r & 0x807fffff ) + 0x3f800000;
00338         mov ebx, r
00339         and ebx, 0x807FFFFF
00340         add ebx, 0x3F800000
00341         mov r, ebx
00342 
00343         // r = ( asm_neg1_div_3 * r + asm_2 ) * r + asm_neg2_div_3;   // (1)
00344         fld r
00345         fmul asm_neg1_div_3
00346         fadd asm_2
00347         fmul r
00348         fadd asm_neg2_div_3
00349         fild log_2
00350         fadd
00351         fmul asm_1_div_log2_e
00352     }
00353 
00354 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00355 
00356         return log( r );
00357 
00358 #endif
00359 }
00360 
00361 #if PEON_COMPILER == PEON_COMPILER_MSVC
00362 #       pragma warning( pop )
00363 #endif
00364 
00365 #endif

Generated on Wed Nov 9 09:37:05 2005 for Peon by  doxygen 1.4.3