1 /****************************************************************************
3 * SciTech OS Portability Manager Library
5 * ========================================================================
7 * The contents of this file are subject to the SciTech MGL Public
8 * License Version 1.0 (the "License"); you may not use this file
9 * except in compliance with the License. You may obtain a copy of
10 * the License at http://www.scitechsoft.com/mgl-license.txt
12 * Software distributed under the License is distributed on an
13 * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
14 * implied. See the License for the specific language governing
15 * rights and limitations under the License.
17 * The Original Code is Copyright (C) 1991-1998 SciTech Software, Inc.
19 * The Initial Developer of the Original Code is SciTech Software, Inc.
20 * All Rights Reserved.
22 * ========================================================================
27 * Description: Main module to implement the Zen Timer support functions.
29 ****************************************************************************/
34 #if !defined(__WIN32_VXD__) && !defined(__OS2_VDD__) && !defined(__NT_DRIVER__)
39 /*----------------------------- Implementation ----------------------------*/
41 /* External Intel assembler functions */
44 ibool _ASMAPI _CPU_haveCPUID(void);
46 ibool _ASMAPI _CPU_check80386(void);
48 ibool _ASMAPI _CPU_check80486(void);
50 uint _ASMAPI _CPU_checkCPUID(void);
52 uint _ASMAPI _CPU_getCPUIDModel(void);
54 uint _ASMAPI _CPU_getCPUIDStepping(void);
56 uint _ASMAPI _CPU_getCPUIDFeatures(void);
58 uint _ASMAPI _CPU_getCacheSize(void);
60 uint _ASMAPI _CPU_have3DNow(void);
62 ibool _ASMAPI _CPU_checkClone(void);
64 void _ASMAPI _CPU_readTimeStamp(CPU_largeInteger *time);
66 void _ASMAPI _CPU_runBSFLoop(ulong iterations);
68 ulong _ASMAPI _CPU_mulDiv(ulong a,ulong b,ulong c);
70 void ZTimerQuickInit(void);
71 #define CPU_HaveMMX 0x00800000
72 #define CPU_HaveRDTSC 0x00000010
73 #define CPU_HaveSSE 0x02000000
76 #if defined(__SMX32__)
77 #include "smx/cpuinfo.c"
78 #elif defined(__RTTARGET__)
79 #include "rttarget/cpuinfo.c"
80 #elif defined(__REALDOS__)
81 #include "dos/cpuinfo.c"
82 #elif defined(__NT_DRIVER__)
83 #include "ntdrv/cpuinfo.c"
84 #elif defined(__WIN32_VXD__)
85 #include "vxd/cpuinfo.c"
86 #elif defined(__WINDOWS32__)
87 #include "win32/cpuinfo.c"
88 #elif defined(__OS2_VDD__)
89 #include "vdd/cpuinfo.c"
90 #elif defined(__OS2__)
91 #include "os2/cpuinfo.c"
92 #elif defined(__LINUX__)
93 #include "linux/cpuinfo.c"
94 #elif defined(__QNX__)
95 #include "qnx/cpuinfo.c"
96 #elif defined(__BEOS__)
97 #include "beos/cpuinfo.c"
99 #error CPU library not ported to this platform yet!
102 /*------------------------ Public interface routines ----------------------*/
104 /****************************************************************************
106 Read an I/O port location.
107 ****************************************************************************/
112 PM_outpb(port,(uchar)index);
113 return PM_inpb(port+1);
116 /****************************************************************************
118 Write an I/O port location.
119 ****************************************************************************/
125 PM_outpb(port,(uchar)index);
126 PM_outpb(port+1,(uchar)value);
129 /****************************************************************************
131 Enables the Cyrix CPUID instruction to properly detect MediaGX and 6x86
133 ****************************************************************************/
134 static void _CPU_enableCyrixCPUID(void)
139 ccr3 = rdinx(0x22,0xC3);
140 wrinx(0x22,0xC3,(uchar)(ccr3 | 0x10));
141 wrinx(0x22,0xE8,(uchar)(rdinx(0x22,0xE8) | 0x80));
142 wrinx(0x22,0xC3,ccr3);
145 /****************************************************************************
147 Returns the type of processor in the system.
153 Numerical identifier for the installed processor
156 Returns the type of processor in the system. Note that if the CPU is an
157 unknown Pentium family processor that we don't have an enumeration for,
158 the return value will be greater than or equal to the value of CPU_UnkPentium
159 (depending on the value returned by the CPUID instruction).
162 CPU_getProcessorSpeed, CPU_haveMMX, CPU_getProcessorName
163 ****************************************************************************/
164 uint ZAPI CPU_getProcessorType(void)
166 #if defined(__INTEL__)
167 uint cpu,vendor,model,cacheSize;
168 static ibool firstTime = true;
170 if (_CPU_haveCPUID()) {
171 cpu = _CPU_checkCPUID();
172 vendor = cpu & ~CPU_mask;
173 if (vendor == CPU_Intel) {
174 /* Check for Intel processors */
175 switch (cpu & CPU_mask) {
176 case 4: cpu = CPU_i486; break;
177 case 5: cpu = CPU_Pentium; break;
179 if ((model = _CPU_getCPUIDModel()) == 1)
180 cpu = CPU_PentiumPro;
181 else if (model <= 6) {
182 cacheSize = _CPU_getCacheSize();
183 if ((model == 5 && cacheSize == 0) ||
184 (model == 5 && cacheSize == 256) ||
185 (model == 6 && cacheSize == 128))
190 else if (model >= 7) {
191 /* Model 7 == Pentium III */
192 /* Model 8 == Celeron/Pentium III Coppermine */
193 cacheSize = _CPU_getCacheSize();
194 if ((model == 8 && cacheSize == 128))
197 cpu = CPU_PentiumIII;
204 else if (vendor == CPU_Cyrix) {
205 /* Check for Cyrix processors */
206 switch (cpu & CPU_mask) {
208 if ((model = _CPU_getCPUIDModel()) == 4)
209 cpu = CPU_CyrixMediaGX;
214 if ((model = _CPU_getCPUIDModel()) == 2)
217 cpu = CPU_CyrixMediaGXm;
222 if ((model = _CPU_getCPUIDModel()) <= 1)
223 cpu = CPU_Cyrix6x86MX;
231 else if (vendor == CPU_AMD) {
232 /* Check for AMD processors */
233 switch (cpu & CPU_mask) {
235 if ((model = _CPU_getCPUIDModel()) == 0)
241 if ((model = _CPU_getCPUIDModel()) <= 3)
249 else if (model == 13) {
250 if (_CPU_getCPUIDStepping() <= 3)
251 cpu = CPU_AMDK6_IIIplus;
253 cpu = CPU_AMDK6_2plus;
259 if ((model = _CPU_getCPUIDModel()) == 3)
268 else if (vendor == CPU_IDT) {
269 /* Check for IDT WinChip processors */
270 switch (cpu & CPU_mask) {
272 if ((model = _CPU_getCPUIDModel()) <= 4)
284 /* Assume a Pentium compatible Intel clone */
287 return cpu | vendor | (_CPU_getCPUIDStepping() << CPU_steppingShift);
290 if (_CPU_check80386())
292 else if (_CPU_check80486()) {
293 /* If we get here we may have a Cyrix processor so we can try
294 * enabling the CPUID instruction and trying again.
298 _CPU_enableCyrixCPUID();
299 return CPU_getProcessorType();
305 if (!_CPU_checkClone())
306 return cpu | CPU_Intel;
309 #elif defined(__ALPHA__)
311 #elif defined(__MIPS__)
313 #elif defined(__PPC__)
318 /****************************************************************************
320 Returns true if the processor supports Intel MMX extensions.
326 True if MMX is available, false if not.
329 This function determines if the processor supports the Intel MMX extended
333 CPU_getProcessorType, CPU_getProcessorSpeed, CPU_have3DNow, CPU_haveSSE,
335 ****************************************************************************/
336 ibool ZAPI CPU_haveMMX(void)
339 if (_CPU_haveCPUID())
340 return (_CPU_getCPUIDFeatures() & CPU_HaveMMX) != 0;
347 /****************************************************************************
349 Returns true if the processor supports AMD 3DNow! extensions.
355 True if 3DNow! is available, false if not.
358 This function determines if the processor supports the AMD 3DNow! extended
362 CPU_getProcessorType, CPU_getProcessorSpeed, CPU_haveMMX, CPU_haveSSE,
364 ****************************************************************************/
365 ibool ZAPI CPU_have3DNow(void)
368 if (_CPU_haveCPUID())
369 return _CPU_have3DNow();
376 /****************************************************************************
378 Returns true if the processor supports Intel KNI extensions.
384 True if Intel KNI is available, false if not.
387 This function determines if the processor supports the Intel KNI extended
391 CPU_getProcessorType, CPU_getProcessorSpeed, CPU_haveMMX, CPU_have3DNow,
393 ****************************************************************************/
394 ibool ZAPI CPU_haveSSE(void)
397 if (_CPU_haveCPUID())
398 return (_CPU_getCPUIDFeatures() & CPU_HaveSSE) != 0;
405 /****************************************************************************
407 True if the RTSC instruction is available, false if not.
410 This function determines if the processor supports the Intel RDTSC
411 instruction, for high precision timing. If the processor is not an Intel or
412 Intel clone CPU, this function will always return false.
415 Returns true if the processor supports RDTSC extensions.
421 True if RTSC is available, false if not.
424 This function determines if the processor supports the RDTSC instruction
425 for reading the processor time stamp counter.
428 CPU_getProcessorType, CPU_getProcessorSpeed, CPU_haveMMX, CPU_have3DNow,
430 ****************************************************************************/
431 ibool ZAPI CPU_haveRDTSC(void)
434 if (_CPU_haveCPUID())
435 return (_CPU_getCPUIDFeatures() & CPU_HaveRDTSC) != 0;
444 #define ITERATIONS 16000
446 #define INNER_LOOPS 400
448 /****************************************************************************
450 If processor does not support time stamp reading, but is at least a 386 or
451 above, utilize method of timing a loop of BSF instructions which take a
452 known number of cycles to run on i386(tm), i486(tm), and Pentium(R)
454 ****************************************************************************/
455 static ulong GetBSFCpuSpeed(
458 CPU_largeInteger t0,t1,count_freq;
459 ulong ticks; /* Microseconds elapsed during test */
460 ulong current; /* Variable to store time elapsed */
462 ulong lowest = (ulong)-1;
464 iPriority = SetMaxThreadPriority();
465 GetCounterFrequency(&count_freq);
466 for (i = 0; i < SAMPLINGS; i++) {
468 for (j = 0; j < INNER_LOOPS; j++)
469 _CPU_runBSFLoop(ITERATIONS);
471 current = t1.low - t0.low;
472 if (current < lowest)
475 RestoreThreadPriority(iPriority);
477 /* Compute frequency */
478 ticks = _CPU_mulDiv(lowest,1000000,count_freq.low);
479 if ((ticks % count_freq.low) > (count_freq.low/2))
480 ticks++; /* Round up if necessary */
483 return ((cycles*INNER_LOOPS)/ticks);
488 /****************************************************************************
490 On processors supporting the Read Time Stamp opcode, compare elapsed
491 time on the High-Resolution Counter with elapsed cycles on the Time
494 The inner loop runs up to 20 times oruntil the average of the previous
495 three calculated frequencies is within 1 MHz of each of the individual
496 calculated frequencies. This resampling increases the accuracy of the
497 results since outside factors could affect this calculation.
498 ****************************************************************************/
499 static ulong GetRDTSCCpuSpeed(
502 CPU_largeInteger t0,t1,s0,s1,count_freq;
503 u64 stamp0, stamp1, ticks0, ticks1;
504 u64 total_cycles, cycles, hz, freq;
505 u64 total_ticks, ticks;
509 PM_set64_32(total_cycles,0);
510 PM_set64_32(total_ticks,0);
511 maxCount = accurate ? 600000 : 30000;
512 iPriority = SetMaxThreadPriority();
513 GetCounterFrequency(&count_freq);
514 PM_set64(freq,count_freq.high,count_freq.low);
515 for (tries = 0; tries < 3; tries++) {
516 /* Loop until 100 ticks have passed since last read of hi-res
517 * counter. This accounts for overhead later.
522 while ((t1.low - t0.low) < 100) {
524 _CPU_readTimeStamp(&s0);
527 /* Loop until 30000 ticks have passed since last read of hi-res counter.
528 * This allows for elapsed time for sampling. For a hi-res frequency
529 * of 1MHz, this is about 0.03 of a second. The frequency reported
530 * by the OS dependent code should be tuned to provide a good
531 * sample period depending on the accuracy of the OS timers (ie:
532 * if the accuracy is lower, lower the frequency to spend more time
533 * in the inner loop to get better accuracy).
537 while ((t1.low - t0.low) < maxCount) {
539 _CPU_readTimeStamp(&s1);
542 /* Find the difference during the timing loop */
543 PM_set64(stamp0,s0.high,s0.low);
544 PM_set64(stamp1,s1.high,s1.low);
545 PM_set64(ticks0,t0.high,t0.low);
546 PM_set64(ticks1,t1.high,t1.low);
547 PM_sub64(cycles,stamp1,stamp0);
548 PM_sub64(ticks,ticks1,ticks0);
550 /* Sum up the results */
551 PM_add64(total_ticks,total_ticks,ticks);
552 PM_add64(total_cycles,total_cycles,cycles);
554 RestoreThreadPriority(iPriority);
556 /* Compute frequency in Hz */
557 PM_mul64(hz,total_cycles,freq);
558 PM_div64(hz,hz,total_ticks);
559 return PM_64to32(hz);
562 #endif /* __INTEL__ */
564 /****************************************************************************
566 Returns the speed of the processor in MHz.
572 accurate - True of the speed should be measured accurately
575 Processor speed in MHz.
578 This function returns the speed of the CPU in MHz. Note that if the speed
579 cannot be determined, this function will return 0.
581 If the accurate parameter is set to true, this function will spend longer
582 profiling the speed of the CPU, and will not round the CPU speed that is
583 reported. This is important for highly accurate timing using the Pentium
584 RDTSC instruction, but it does take a lot longer for the profiling to
585 produce accurate results.
588 CPU_getProcessorSpeedInHz, CPU_getProcessorType, CPU_haveMMX,
590 ****************************************************************************/
591 ulong ZAPI CPU_getProcessorSpeed(
594 #if defined(__INTEL__)
595 /* Number of cycles needed to execute a single BSF instruction on i386+
600 static ulong intel_cycles[] = {
603 static ulong cyrix_cycles[] = {
606 static ulong amd_cycles[] = {
609 static ulong known_speeds[] = {
610 1000,950,900,850,800,750,700,650,600,550,500,450,433,400,350,
611 333,300,266,233,200,166,150,133,120,100,90,75,66,60,50,33,20,0,
614 if (CPU_haveRDTSC()) {
615 cpuSpeed = (GetRDTSCCpuSpeed(accurate) + 500000) / 1000000;
618 int type = CPU_getProcessorType();
619 int processor = type & CPU_mask;
620 int vendor = type & CPU_familyMask;
621 if (vendor == CPU_Intel)
622 cpuSpeed = GetBSFCpuSpeed(ITERATIONS * intel_cycles[processor - CPU_i386]);
623 else if (vendor == CPU_Cyrix)
624 cpuSpeed = GetBSFCpuSpeed(ITERATIONS * cyrix_cycles[processor - CPU_Cyrix6x86]);
625 else if (vendor == CPU_AMD)
626 cpuSpeed = GetBSFCpuSpeed(ITERATIONS * amd_cycles[0]);
631 /* Now normalise the results given known processors speeds, if the
632 * speed we measure is within 2MHz of the expected values
635 for (i = 0; known_speeds[i] != 0; i++) {
636 if (cpuSpeed >= (known_speeds[i]-3) && cpuSpeed <= (known_speeds[i]+3)) {
637 return known_speeds[i];
647 /****************************************************************************
649 Returns the speed of the processor in Hz.
655 Accurate processor speed in Hz.
658 This function returns the accurate speed of the CPU in Hz. Note that if the
659 speed cannot be determined, this function will return 0.
661 This function is similar to the CPU_getProcessorSpeed function, except that
662 it attempts to accurately measure the CPU speed in Hz. This is used
663 internally in the Zen Timer libraries to provide accurate real world timing
664 information. This is important for highly accurate timing using the Pentium
665 RDTSC instruction, but it does take a lot longer for the profiling to
666 produce accurate results.
669 CPU_getProcessorSpeed, CPU_getProcessorType, CPU_haveMMX,
671 ****************************************************************************/
672 ulong ZAPI CPU_getProcessorSpeedInHZ(
675 #if defined(__INTEL__)
676 if (CPU_haveRDTSC()) {
677 return GetRDTSCCpuSpeed(accurate);
679 return CPU_getProcessorSpeed(false) * 1000000;
685 /****************************************************************************
687 Returns a string defining the speed and name of the processor.
693 Processor name string.
696 This function returns an English string describing the speed and name of the
700 CPU_getProcessorType, CPU_haveMMX, CPU_getProcessorName
701 ****************************************************************************/
702 char * ZAPI CPU_getProcessorName(void)
704 #if defined(__INTEL__)
705 static int cpu,speed = -1;
706 static char name[80];
709 cpu = CPU_getProcessorType();
710 speed = CPU_getProcessorSpeed(false);
712 sprintf(name,"%d MHz ", speed);
713 switch (cpu & CPU_mask) {
715 strcat(name,"Intel i386 processor");
718 strcat(name,"Intel i486 processor");
721 strcat(name,"Intel Pentium processor");
724 strcat(name,"Intel Pentium Pro processor");
727 strcat(name,"Intel Pentium II processor");
730 strcat(name,"Intel Celeron processor");
733 strcat(name,"Intel Pentium III processor");
736 strcat(name,"Unknown Intel processor");
739 strcat(name,"Cyrix 6x86 processor");
741 case CPU_Cyrix6x86MX:
742 strcat(name,"Cyrix 6x86MX processor");
744 case CPU_CyrixMediaGX:
745 strcat(name,"Cyrix MediaGX processor");
747 case CPU_CyrixMediaGXm:
748 strcat(name,"Cyrix MediaGXm processor");
751 strcat(name,"Unknown Cyrix processor");
754 strcat(name,"AMD Am486 processor");
757 strcat(name,"AMD Am5x86 processor");
760 strcat(name,"AMD K5 processor");
763 strcat(name,"AMD K6 processor");
766 strcat(name,"AMD K6-2 processor");
769 strcat(name,"AMD K6-III processor");
771 case CPU_AMDK6_2plus:
772 strcat(name,"AMD K6-2+ processor");
774 case CPU_AMDK6_IIIplus:
775 strcat(name,"AMD K6-III+ processor");
778 strcat(name,"Unknown AMD processor");
781 strcat(name,"AMD Athlon processor");
784 strcat(name,"AMD Duron processor");
787 strcat(name,"IDT WinChip C6 processor");
790 strcat(name,"IDT WinChip 2 processor");
793 strcat(name,"Unknown IDT processor");
796 strcat(name,"Unknown processor");
799 strcat(name," with MMX(R)");
801 strcat(name,", 3DNow!(R)");
803 strcat(name,", SSE(R)");