home *** CD-ROM | disk | FTP | other *** search
/ PC User 1997 April / PCU_APR_97.ISO / utils / cpu / cpuinfo / source / cpuinf16 / speed.c < prev    next >
Encoding:
C/C++ Source or Header  |  1996-11-13  |  16.6 KB  |  593 lines

  1. /***************************************************************
  2. * C file:  Speed.c... for cpuinf16 DLL
  3. *
  4. *       This program has been developed by Intel Corporation.  
  5. *        You have Intel's permission to incorporate this code 
  6. *       into your product, royalty free.  Intel has various 
  7. *        intellectual property rights which it may assert under
  8. *       certain circumstances, such as if another manufacturer's
  9. *       processor mis-identifies itself as being "GenuineIntel"
  10. *        when the CPUID instruction is executed.
  11. *
  12. *       Intel specifically disclaims all warranties, express or
  13. *       implied, and all liability, including consequential and
  14. *        other indirect damages, for the use of this code, 
  15. *        including liability for infringement of any proprietary
  16. *        rights, and including the warranties of merchantability
  17. *        and fitness for a particular purpose.  Intel does not 
  18. *        assume any responsibility for any errors which may 
  19. *        appear in this code nor any responsibility to update it.
  20. *
  21. *  * Other brands and names are the property of their respective
  22. *    owners.
  23. *
  24. *  Copyright (c) 1995, Intel Corporation.  All rights reserved.
  25. ***************************************************************/
  26.   
  27. #include <windows.h> 
  28. #include <math.h>
  29. #include <stdio.h>
  30. #include <mmsystem.h>
  31. #include <limits.h>    
  32. #include <memory.h>
  33.  
  34. #include "speed.h"
  35. #include "cpuid.h"
  36.  
  37. // Tabs set at 4
  38. #define ROUND_THRESHOLD        6
  39.  
  40. // Tabs set at 4
  41. static struct FREQ_INFO GetCmosCpuSpeed();
  42. static struct FREQ_INFO GetRDTSCCpuSpeed();
  43. static struct FREQ_INFO GetBSFCpuSpeed(ulong cycles);
  44. static unsigned long diffTime64(unsigned long t1Hi, unsigned long t1Low, 
  45.                          unsigned long t2Hi, unsigned long t2Low, 
  46.                            unsigned long *tHi, unsigned long *tLow );
  47.  
  48. // extern in ASM file
  49. ushort Time_Processor_bsf(void);
  50.  
  51. /***************************************************************
  52. * LibMain() -- Windows entry procedure for DLLSs
  53. ***************************************************************/
  54. int FAR PASCAL _export LibMain(HANDLE hI, WORD wDS, WORD cbHS, LPSTR lpszCL) {
  55.     if (cbHS != 0) 
  56.     
  57.         UnlockData(0);
  58.     
  59.     return 1;
  60. } // LibMain()
  61.  
  62.  
  63.  
  64. /***************************************************************
  65. * WEP() -- Windows exit procedure for the DLLs.
  66. *
  67. ***************************************************************/
  68. int FAR PASCAL _export WEP(int nParam) {
  69.     return 1;
  70. } // WEP()
  71.  
  72.  
  73.  
  74. /***************************************************************
  75. * CpurawSpeed() -- Return the raw clock rate of the host CPU.
  76. *
  77. * Inputs:
  78. *    clocks:        0: Use default value for number of cycles
  79. *                   per BSF instruction.
  80. *               -1: Use CMos timer to get cpu speed (DOES NOT WORK FOR WINNT).
  81. *               Positive Integer: Use clocks value for number
  82. *                   of cycles per BSF instruction.
  83. *
  84. * Returns:
  85. *        If error then return all zeroes in FREQ_INFO structure
  86. *        Else return FREQ_INFO structure containing calculated 
  87. *       clock frequency, normalized clock frequency, number of 
  88. *       clock cycles during test sampling, and the number of 
  89. *       microseconds elapsed during the sampling.
  90. ***************************************************************/
  91.  
  92. unsigned long FAR PASCAL cpurawspeed(int clocks)
  93. {
  94.     struct FREQ_INFO cpu_speed;
  95.  
  96.     cpu_speed = cpuspeed(clocks);
  97.     return cpu_speed.raw_freq;
  98. }
  99.  
  100. /***************************************************************
  101. * CpunormSpeed() -- Return the raw clock rate of the host CPU.
  102. *
  103. * Inputs:
  104. *    clocks:        0: Use default value for number of cycles
  105. *                   per BSF instruction.
  106. *               -1: Use CMos timer to get cpu speed.
  107. *               Positive Integer: Use clocks value for number
  108. *                   of cycles per BSF instruction.
  109. *
  110. * Returns:
  111. *        If error then return all zeroes in FREQ_INFO structure
  112. *        Else return FREQ_INFO structure containing calculated 
  113. *       clock frequency, normalized clock frequency, number of 
  114. *       clock cycles during test sampling, and the number of 
  115. *       microseconds elapsed during the sampling.
  116. ***************************************************************/
  117.  
  118. unsigned long FAR PASCAL cpunormspeed(int clocks)
  119. {
  120.     struct FREQ_INFO cpu_speed;
  121.  
  122.     cpu_speed = cpuspeed(clocks);
  123.     return cpu_speed.norm_freq;
  124. }
  125.  
  126. /***************************************************************
  127. * ProcessorCount() -- Return the number of CPU's on this machine.
  128. *
  129. * Inputs:
  130. *
  131. * Returns:
  132. *         always 1 for 16 bit dll
  133. ***************************************************************/
  134.  
  135. unsigned long FAR PASCAL ProcessorCount()
  136. {
  137.     return 1;
  138. }
  139.  
  140. /***************************************************************
  141. * CpuSpeed() -- Return the raw clock rate of the host CPU.
  142. *
  143. * Inputs:
  144. *    clocks:        NULL: Use default value for number of cycles
  145. *                   per BSF instruction.
  146. *               Positive Integer: Use clocks value for number
  147. *                   of cycles per BSF instruction.
  148. *
  149. * Returns:
  150. *        If error then return all zeroes in FREQ_INFO structure
  151. *        Else return FREQ_INFO structure containing calculated 
  152. *       clock frequency, normalized clock frequency, number of 
  153. *       clock cycles during test sampling, and the number of 
  154. *       microseconds elapsed during the sampling.
  155. ***************************************************************/
  156.  
  157. struct FREQ_INFO FAR PASCAL cpuspeed(int clocks) 
  158. {
  159.     ulong  cycles;                    // Clock cycles elapsed 
  160.                                     //   during test
  161.     
  162.     ushort processor = wincpuid();    // Family of processor
  163.  
  164.     DWORD features = wincpufeatures();    // Features of Processor
  165.     
  166.     int manual=0;            // Specifies whether the user 
  167.                             //   manually entered the number of
  168.                             //   cycles for the BSF instruction.
  169.  
  170.     struct FREQ_INFO cpu_speed;        // Return structure for
  171.                                     //   cpuspeed
  172.                     
  173.     // Number of cycles needed to execute a single BSF 
  174.     //   instruction. Note that processors below i386(tm) 
  175.     //   are not supported.
  176.     ushort processor_cycles[] = {
  177.         00,  00,  00, 115, 47, 43, 
  178.         38,  38,  38, 38,  38, 38, 
  179.     };
  180.  
  181.     memset(&cpu_speed, 0x00, sizeof(cpu_speed));
  182.     
  183.     if ( processor & CLONE_MASK )
  184.         return cpu_speed;
  185.  
  186.     // Check for manual BSF instruction clock count
  187.     if (0 <= clocks) {
  188.         cycles =ITERATIONS*(ulong)processor_cycles[processor];
  189.     }
  190.     else if (0 < clocks && clocks <= MAXCLOCKS)  {
  191.         cycles = ITERATIONS * (ulong) clocks;
  192.         manual = 1;            // Toggle manual control flag.
  193.                             //   Note that this mode will not
  194.                             //      work properly with processors
  195.                             //   which can process multiple
  196.                             //   BSF instructions at a time.
  197.                             //   For example, manual mode
  198.                             //   will not work on a 
  199.                             //   PentiumPro(R)
  200.     }
  201.  
  202.     if ( ( features&0x00000010 ) && !(manual) ) {
  203.         if ( clocks == 0 )
  204.             return GetRDTSCCpuSpeed();
  205.         else
  206.             return GetCmosCpuSpeed();    
  207.     }
  208.     else if ( processor >= 3 ) {
  209.         return GetBSFCpuSpeed(cycles);
  210.     }        
  211.  
  212.     return cpu_speed;
  213.        
  214. } // cpuspeed()
  215.  
  216.  
  217.  
  218. static struct FREQ_INFO GetBSFCpuSpeed(ulong cycles)
  219. {
  220.      ulong  ticks;    // Microseconds elapsed                                 //   during test
  221.     ulong freq;        // Most current frequ. calculation
  222.     int i;            // Temporary Variable
  223.  
  224.     ulong current = 0;      // Variable to store time
  225.                             //   elapsed during loop of
  226.                             //   of BSF instructions
  227.  
  228.     ulong lowest  = ULONG_MAX;    // Since algorithm finds 
  229.                                 //   the lowest value out of
  230.                                 //   a set of samplings, 
  231.                                 //   this variable is set 
  232.                                 //   intially to the max 
  233.                                 //   unsigned long value). 
  234.                                 //   This guarantees that 
  235.                                 //   the initialized value 
  236.                                 //   is not later used as 
  237.                                 //   the least time through 
  238.                                 //   the loop.
  239.  
  240.     struct FREQ_INFO cpu_speed;        // Return structure for
  241.                                     //   cpuspeed
  242.                     
  243.     memset(&cpu_speed, 0x00, sizeof(cpu_speed));
  244.     
  245.     for ( i = 0; i < SAMPLINGS; i++ ) {
  246.                                 // Sample SAMPLINGS times. 
  247.                                 //   Can be increased or 
  248.                                 //   decreased depending
  249.                                 //   on accuracy vs. time
  250.                                 //   requirements
  251.  
  252.             
  253.         current = Time_Processor_bsf();
  254.            
  255.         if ( current < lowest )        // Take lowest elapsed
  256.             lowest = current;        //   time to account
  257.                                     //   for some samplings
  258.                                     //   being interrupted
  259.                                     //   by other operations 
  260.     }
  261.  
  262.     ticks = lowest;                
  263.         
  264.         
  265.     // Note that some seemingly arbitrary mulitplies and
  266.     //   divides are done below. This is to maintain a 
  267.     //   high level of precision without truncating the 
  268.     //   most significant data. According to what value 
  269.     //   ITERATIIONS is set to, these multiplies and
  270.     //   divides might need to be shifted for optimal
  271.     //   precision.
  272.  
  273.     ticks = ticks * 100000;     // Convert ticks to hundred 
  274.                                   //   thousandths of a tick
  275.         
  276.     ticks = ticks / 119318;        // Convert hundred 
  277.                                    //   thousandths of ticks to
  278.                                    //   microseconds (us)
  279.         
  280.     if ( (ticks%119318) >= 119318/2 )
  281.            ticks++;                // Round up if necessary
  282.             
  283.     freq = cycles/ticks;        // Cycles / us  = MHz
  284.  
  285.     cpu_speed.raw_freq  = freq;
  286.     if ( cycles%ticks > ticks/2 )
  287.            freq++;                    // Round up if necessary
  288.  
  289.     cpu_speed.in_cycles = cycles;    // Return variable structure
  290.     cpu_speed.ex_ticks  = ticks;    //   determined by one of 
  291.     cpu_speed.norm_freq = freq;
  292.  
  293.     return cpu_speed;
  294. }            
  295.  
  296. static struct FREQ_INFO GetRDTSCCpuSpeed()
  297. {
  298.      ulong  total_ticks=0, ticks;    // Microseconds elapsed 
  299.                                     //   during test
  300.     
  301.     ulong  total_cycles=0, cycles;    // Clock cycles elapsed 
  302.                                     //   during test
  303.     
  304.     ulong  stamp0, stamp1;            // Time Stamp Variable 
  305.                                     //   for beginning and end 
  306.     
  307.     ushort u0,u1;                     // 16-bit variables for time
  308.     ushort v0,v1;                    //   stamp reads. These are 
  309.                                     //   later merged into 
  310.                                     //   stamp0, and stamp1 
  311.                                     //   (32-bit variables)
  312.  
  313.     ulong freq;                // Most current frequ. calculation
  314.     ulong freq2;            // 2nd most current frequ. calc.
  315.     ulong freq3;            // 3rd most current frequ. calc.
  316.     
  317.     ulong total;            // Sum of previous three frequency
  318.                             //   calculations
  319.  
  320.     int manual=0;            // Specifies whether the user 
  321.                             //   manually entered the number of
  322.                             //   cycles for the BSF instruction.
  323.  
  324.     int tries=0;            // Number of times a calculation has
  325.                             //   been made on this call to 
  326.                             //   cpuspeed
  327.  
  328.     struct FREQ_INFO cpu_speed;        // Return structure for
  329.                                     //   cpuspeed
  330.                     
  331.     DWORD t0,t1;                       // Variables to store 
  332.                                     //   timeGetTime values
  333.  
  334.     memset(&cpu_speed, 0x00, sizeof(cpu_speed));
  335.  
  336.     // On Pentium Processors or above use 
  337.     //   the Read Time Stamp method which 
  338.     //   compares elapsed time on from the
  339.     //   timeGetTime call with elaped
  340.     //   cycles on the Time Stamp Register.
  341.  
  342.     do {            // This do loop runs up to 20 times or
  343.                     //   until the average of the previous 
  344.                     //   three calculated frequencies is 
  345.                     //   within 2 MHz of each of the 
  346.                     //   individual calculated frequencies. 
  347.                     //   This resampling increases the 
  348.                     //   accuracy of the results since
  349.                     //   outside factors could affect this
  350.                     //   calculation
  351.             
  352.         tries++;        // Increment number of times sampled
  353.                         //   on this call to cpuspeed
  354.                                     
  355.         freq3 = freq2;    // Shift frequencies back to make
  356.         freq2 = freq;    //   room for new frequency 
  357.                             //   measurement
  358.  
  359.            t0 = timeGetTime();
  360.             
  361.         t1 = t0;        // Set Initial Time
  362.  
  363.            while ( t1 - t0 < INITIAL_DELAY ) {
  364.                            // Loop until three ticks have 
  365.                            //   passed    since last read of 
  366.                         //     timeGetTime. This accounts for
  367.                         //   overhead later.
  368.  
  369.             t1 = timeGetTime();
  370.         }                    
  371.  
  372.            __asm                        // Read Time Stamp...
  373.         {                       
  374.             RDTSC
  375.                 
  376.             MOV CL, 16
  377.                             
  378.             MOV u0, AX
  379.             OPND32
  380.             SHR    AX,CL
  381.             MOV u1, AX
  382.         }                   
  383.         
  384.         t0 = t1;                    // Reset Initial Time
  385.             
  386.           while ( t1 - t0 < SAMPLING_DELAY ) {
  387.                              // Loop until 60 ticks have passed 
  388.                              //   since last timeGetTime read. 
  389.                              //   This allows for elapsed time
  390.                              //   for sampling
  391.                               
  392.             t1 = timeGetTime();
  393.         }                            
  394.             
  395.  
  396.            __asm                        // Read Time Stamp...
  397.         {
  398.             RDTSC
  399.                 
  400.             MOV CL, 16
  401.                             
  402.             MOV v0, AX
  403.             OPND32
  404.             SHR    AX,CL
  405.             MOV v1, AX
  406.         }                   
  407.         
  408.         stamp0 = (ulong) u1*65536 + u0;
  409.         stamp1 = (ulong) v1*65536 + v0;
  410.                         // Move two 16-bit values into one 
  411.                         //   32-bit value for the time stamp
  412.                         //   read at both the beginning and 
  413.                         //   end of the test.
  414.                                                    
  415.            cycles = stamp1 - stamp0;    // Number of internal 
  416.                                        //   clock cycles is 
  417.                                        //   difference between 
  418.                                        //   two time stamp 
  419.                                        //   readings.
  420.  
  421.            ticks = (ulong) t1 - t0;    // Number of external 
  422.                                        //   ticks is difference 
  423.                                        //   between two 
  424.                                        //   timeGetTime reads
  425.                                     
  426.         ticks = ticks * 1000;        // Convert ticks to us 
  427.                                        //   (since the 
  428.                                        //   timeGetTime
  429.                                        //   frequency is 1 
  430.                                        //   tick/ms).
  431.  
  432.         total_ticks += ticks;
  433.         total_cycles += cycles;
  434.  
  435.         freq = cycles/ticks;        // Cycles / us  = MHz
  436.                                                 
  437.           if ( cycles%ticks > ticks/2 )
  438.                freq++;                    // Round up if necessary    
  439.                   
  440.         total = ( freq + freq2 + freq3 );
  441.                             // Total last three frequency 
  442.                             //   calculations
  443.  
  444.                 
  445.     } while((tries<3) ||             
  446.             (tries<MAX_TRIES)&&
  447.             ((abs(3*(int)freq -(int)total)>3*TOLERANCE)||
  448.              (abs(3*(int)freq2-(int)total)>3*TOLERANCE)||
  449.              (abs(3*(int)freq3-(int)total)>3*TOLERANCE)));    
  450.                     // Compare last three calculations to 
  451.                       //   average of last three calculations.        
  452.         
  453.     // Try one more significant digit.
  454.     freq3 = ( total_cycles * 10 ) / total_ticks;
  455.     freq2 = ( total_cycles * 100 ) / total_ticks;
  456.  
  457.     if ( freq2 - (freq3 * 10) >= ROUND_THRESHOLD )
  458.         freq3++;
  459.  
  460.     cpu_speed.raw_freq = total_cycles / total_ticks;
  461.     cpu_speed.norm_freq = cpu_speed.raw_freq;
  462.  
  463.     freq = cpu_speed.raw_freq * 10;
  464.     if( (freq3 - freq) >= ROUND_THRESHOLD )
  465.         cpu_speed.norm_freq++;
  466.  
  467.     cpu_speed.ex_ticks = total_ticks;
  468.     cpu_speed.in_cycles = total_cycles;
  469.  
  470.     return cpu_speed;
  471. }    
  472.  
  473.  
  474. static int GetCmosTick(void)
  475. {
  476.     int tick = 0;
  477.  
  478.     // __asm    mov ah, 02h
  479.     // __asm    int 1Ah
  480.     // __asm    mov al, dh
  481.     // __asm    and ax, 000Fh  
  482.  
  483.     __asm  xor ax, ax
  484.     __asm  out 070h, al
  485.  
  486.     __asm  xor ax, ax
  487.     __asm  in  al, 071h
  488.  
  489.     // _outp( 0x70, offset );
  490.     // base = _inp( 0x71 ); 
  491.  
  492. // value returned in ax by function
  493.  
  494.     __asm     mov word ptr tick, ax
  495.  
  496.     return tick;
  497. }
  498.  
  499.  
  500. //#define ABS_TICK(a,b)  (b<a)?b+60-a:b-a
  501. // since I am only interested in single ticks
  502. #define ABS_TICK(a,b)  (b<a)?b+10-a:b-a
  503.  
  504. static struct FREQ_INFO GetCmosCpuSpeed()
  505. {
  506.     int    timeStart, timeStop, lapseTime;
  507.     unsigned long   temp;
  508.     unsigned long   temp1;
  509.     struct FREQ_INFO cpu_speed;
  510.     unsigned long   cpuSpeed = 0l;
  511.     ulong  stamp0, stamp1;            // Time Stamp Variable 
  512.                                     //   for beginning and end 
  513.     
  514.     ushort u0,u1;                     // 16-bit variables for time
  515.     ushort v0,v1;                    //   stamp reads. These are 
  516.                                     //   later merged into 
  517.                                     //   stamp0, and stamp1 
  518.                                     //   (32-bit variables)
  519.  
  520.     memset(&cpu_speed, 0x00, sizeof(cpu_speed));
  521.  
  522.     // This loop waits for the next tick
  523.     // so that we begin speed test on a tick edge
  524.     timeStart = GetCmosTick();
  525.     for(;;)
  526.     {
  527.         timeStop = GetCmosTick();
  528.         if (  ABS_TICK(timeStart,timeStop) > 0 )
  529.         {
  530.             __asm                        // Read Time Stamp...
  531.             {                       
  532.                 RDTSC
  533.                 
  534.                 MOV CL, 16
  535.                             
  536.                 MOV u0, AX
  537.                 OPND32
  538.                 SHR    AX,CL
  539.                 MOV u1, AX
  540.             }                   
  541.             break;    
  542.         }    
  543.     }
  544.  
  545.     timeStart = timeStop;
  546.  
  547.     for(;;)
  548.     {
  549.         timeStop = GetCmosTick();
  550.         if (  ABS_TICK(timeStart,timeStop) > 0 )
  551.         {
  552.             __asm                        // Read Time Stamp...
  553.             {
  554.                 RDTSC
  555.                 
  556.                 MOV CL, 16
  557.                             
  558.                 MOV v0, AX
  559.                 OPND32
  560.                 SHR    AX,CL
  561.                 MOV v1, AX
  562.             }                   
  563.             break;    
  564.         }    
  565.     }
  566.  
  567.     // convert into long values
  568.     stamp0 = (ulong) u1*65536 + u0;
  569.     stamp1 = (ulong) v1*65536 + v0;
  570.  
  571.     lapseTime = ABS_TICK(timeStart,timeStop);
  572.  
  573.     cpuSpeed = stamp1 - stamp0; ///lapseTime; 
  574.     cpu_speed.in_cycles = cpuSpeed;        // Cycles count since we in this routine
  575.  
  576.     //round to nearest digit
  577.     temp =  cpuSpeed/1000000;     
  578.     temp1 = cpuSpeed/100000;  
  579.     temp = temp * 10;  // realign with last digit = zero
  580.  
  581.     cpuSpeed = cpuSpeed/1000000; // cpuSpeed/1000000;
  582.     cpu_speed.raw_freq = cpuSpeed;    
  583.  
  584.     if( (temp1 - temp) >= ROUND_THRESHOLD )
  585.         cpuSpeed++;
  586.     
  587.     cpu_speed.norm_freq = cpuSpeed;    
  588.     cpu_speed.ex_ticks = (timeStop - timeStart) * 1000000;
  589.  
  590.     return cpu_speed;            
  591. }
  592.