00001 #ifndef __asm_math_H__
00002 #define __asm_math_H__
00003
00004 #include "peonstdafx.h"
00005
00006
00007
00008
00009
00010 const float pi = 4.0 * atan( 1.0 );
00011 const float half_pi = 0.5 * pi;
00012
00013
00014
00015
00016 #if PEON_COMPILER == PEON_COMPILER_MSVC
00017 # pragma warning( push )
00018 # pragma warning( disable: 4035 )
00019 #endif
00020
00021 float asm_arccos( float r ) {
00022
00023
00024 #if PEON_COMPILER == PEON_COMPILER_MSVC
00025
00026 float asm_one = 1.f;
00027 float asm_half_pi = half_pi;
00028 __asm {
00029 fld r
00030 fld r
00031 fmul r
00032 fsubr asm_one
00033 fsqrt
00034 fchs
00035 fdiv
00036 fld1
00037 fpatan
00038 fadd asm_half_pi
00039 }
00040
00041 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00042
00043 return float( acos( r ) );
00044
00045 #endif
00046 }
00047
00048 float asm_arcsin( float r ) {
00049
00050
00051 #if PEON_COMPILER == PEON_COMPILER_MSVC
00052
00053 const float asm_one = 1.f;
00054 __asm {
00055 fld r
00056 fld r
00057 fmul r
00058 fsubr asm_one
00059 fsqrt
00060 fdiv
00061 fld1
00062 fpatan
00063 }
00064
00065 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00066
00067 return float( asin( r ) );
00068
00069 #endif
00070
00071 }
00072
00073 float asm_arctan( float r ) {
00074
00075 #if PEON_COMPILER == PEON_COMPILER_MSVC
00076
00077 __asm {
00078 fld r
00079 fld1
00080 fpatan
00081 }
00082
00083 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00084
00085 return float( atan( r ) );
00086
00087 #endif
00088
00089 }
00090
00091 float asm_sin( float r ) {
00092
00093 #if PEON_COMPILER == PEON_COMPILER_MSVC
00094
00095 __asm {
00096 fld r
00097 fsin
00098 }
00099
00100 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00101
00102 return sin( r );
00103
00104 #endif
00105
00106 }
00107
00108 float asm_cos( float r ) {
00109
00110 #if PEON_COMPILER == PEON_COMPILER_MSVC
00111
00112 __asm {
00113 fld r
00114 fcos
00115 }
00116
00117 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00118
00119 return cos( r );
00120
00121 #endif
00122 }
00123
00124 float asm_tan( float r ) {
00125
00126 #if PEON_COMPILER == PEON_COMPILER_MSVC
00127
00128
00129 __asm {
00130 fld r
00131 fsin
00132 fld r
00133 fcos
00134 fdiv
00135 }
00136
00137 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00138
00139 return tan( r );
00140
00141 #endif
00142 }
00143
00144
00145 float asm_sqrt( float r )
00146 {
00147 #if PEON_COMPILER == PEON_COMPILER_MSVC
00148
00149 __asm {
00150 fld r
00151 fsqrt
00152 }
00153
00154 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00155
00156 return sqrt( r );
00157
00158 #endif
00159 }
00160
00161
00162
00163 float asm_rsq( float r )
00164 {
00165 #if PEON_COMPILER == PEON_COMPILER_MSVC
00166
00167 __asm {
00168 fld1
00169 fld r
00170 fsqrt
00171 fdiv
00172 }
00173
00174 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00175
00176 return 1. / sqrt( r );
00177
00178 #endif
00179 }
00180
00181
00182
00183 float apx_rsq( float r ) {
00184
00185 #if PEON_COMPILER == PEON_COMPILER_MSVC
00186
00187 const float asm_dot5 = 0.5f;
00188 const float asm_1dot5 = 1.5f;
00189
00190 __asm {
00191 fld r
00192 fmul asm_dot5
00193 mov eax, r
00194 shr eax, 0x1
00195 neg eax
00196 add eax, 0x5F400000
00197 mov r, eax
00198 fmul r
00199 fmul r
00200 fsubr asm_1dot5
00201 fmul r
00202 }
00203
00204 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00205
00206 return 1. / sqrt( r );
00207
00208 #endif
00209 }
00210
00211
00212
00213
00214
00215 #if PEON_COMPILER == PEON_COMPILER_MSVC
00216
00217 __declspec(naked) float __fastcall InvSqrt(float fValue)
00218 {
00219 __asm
00220 {
00221 mov eax, 0be6eb508h
00222 mov dword ptr[esp-12],03fc00000h
00223 sub eax, dword ptr[esp + 4]
00224 sub dword ptr[esp+4], 800000h
00225 shr eax, 1
00226 mov dword ptr[esp - 8], eax
00227
00228 fld dword ptr[esp - 8]
00229 fmul st, st
00230 fld dword ptr[esp - 8]
00231 fxch st(1)
00232 fmul dword ptr[esp + 4]
00233 fld dword ptr[esp - 12]
00234 fld st(0)
00235 fsub st,st(2)
00236
00237 fld st(1)
00238 fxch st(1)
00239 fmul st(3),st
00240 fmul st(3),st
00241 fmulp st(4),st
00242 fsub st,st(2)
00243
00244 fmul st(2),st
00245 fmul st(3),st
00246 fmulp st(2),st
00247 fxch st(1)
00248 fsubp st(1),st
00249
00250 fmulp st(1), st
00251 ret 4
00252 }
00253 }
00254
00255 #endif
00256
00257
00258 FORCEINLINE float asm_rand()
00259 {
00260
00261 #if PEON_COMPILER == PEON_COMPILER_MSVC
00262 #if 0
00263 #if PEON_COMP_VER >= 1300
00264
00265 static unsigned __int64 q = time( NULL );
00266
00267 _asm {
00268 movq mm0, q
00269
00270
00271 pshufw mm1, mm0, 0x1E
00272 paddd mm0, mm1
00273
00274
00275 movq q, mm0
00276 emms
00277 }
00278
00279 return float( q );
00280 #endif
00281 #else
00282
00283 return float( rand() );
00284 #endif
00285 #else
00286
00287
00288 return float( rand() );
00289
00290 #endif
00291 }
00292
00293
00294 FORCEINLINE float asm_rand_max()
00295 {
00296
00297 #if PEON_COMPILER == PEON_COMPILER_MSVC
00298 #if 0
00299 #if PEON_COMP_VER >= 1300
00300
00301 return std::numeric_limits< unsigned __int64 >::max();
00302 return 9223372036854775807.0f;
00303 #endif
00304 #else
00305
00306 return float( RAND_MAX );
00307 #endif
00308
00309 #else
00310
00311 return float( RAND_MAX );
00312
00313 #endif
00314 }
00315
00316
00317 float asm_ln( float r ) {
00318
00319 #if PEON_COMPILER == PEON_COMPILER_MSVC
00320
00321 const float asm_e = 2.71828182846f;
00322 const float asm_1_div_log2_e = .693147180559f;
00323 const float asm_neg1_div_3 = -.33333333333333333333333333333f;
00324 const float asm_neg2_div_3 = -.66666666666666666666666666667f;
00325 const float asm_2 = 2.f;
00326
00327 int log_2 = 0;
00328
00329 __asm {
00330
00331 mov eax, r
00332 sar eax, 0x17
00333 and eax, 0xFF
00334 sub eax, 0x80
00335 mov log_2, eax
00336
00337
00338 mov ebx, r
00339 and ebx, 0x807FFFFF
00340 add ebx, 0x3F800000
00341 mov r, ebx
00342
00343
00344 fld r
00345 fmul asm_neg1_div_3
00346 fadd asm_2
00347 fmul r
00348 fadd asm_neg2_div_3
00349 fild log_2
00350 fadd
00351 fmul asm_1_div_log2_e
00352 }
00353
00354 #elif PEON_COMPILER == PEON_COMPILER_GNUC
00355
00356 return log( r );
00357
00358 #endif
00359 }
00360
00361 #if PEON_COMPILER == PEON_COMPILER_MSVC
00362 # pragma warning( pop )
00363 #endif
00364
00365 #endif