Init AMD Bulldozer codebase.
[platform/upstream/openblas.git] / getarch.c
1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the
15       distribution.
16    3. Neither the name of the ISCAS nor the names of its contributors may 
17       be used to endorse or promote products derived from this software 
18       without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 **********************************************************************************/
32
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin.           */
35 /* All rights reserved.                                              */
36 /*                                                                   */
37 /* Redistribution and use in source and binary forms, with or        */
38 /* without modification, are permitted provided that the following   */
39 /* conditions are met:                                               */
40 /*                                                                   */
41 /*   1. Redistributions of source code must retain the above         */
42 /*      copyright notice, this list of conditions and the following  */
43 /*      disclaimer.                                                  */
44 /*                                                                   */
45 /*   2. Redistributions in binary form must reproduce the above      */
46 /*      copyright notice, this list of conditions and the following  */
47 /*      disclaimer in the documentation and/or other materials       */
48 /*      provided with the distribution.                              */
49 /*                                                                   */
50 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
51 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
52 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
53 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
54 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
55 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
56 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
57 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
58 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
59 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
60 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
61 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
62 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
63 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
64 /*                                                                   */
65 /* The views and conclusions contained in the software and           */
66 /* documentation are those of the authors and should not be          */
67 /* interpreted as representing official policies, either expressed   */
68 /* or implied, of The University of Texas at Austin.                 */
69 /*********************************************************************/
70
71 #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
72 #define OS_WINDOWS
73 #endif
74
75 #include <stdio.h>
76 #include <string.h>
77 #ifdef OS_WINDOWS
78 #include <windows.h>
79 #endif
80 #if defined(__FreeBSD__) || defined(__APPLE__)
81 #include <sys/types.h>
82 #include <sys/sysctl.h>
83 #endif
84 #ifdef linux
85 #include <sys/sysinfo.h>
86 #endif
87
88 /* #define FORCE_P2             */
89 /* #define FORCE_KATMAI         */
90 /* #define FORCE_COPPERMINE     */
91 /* #define FORCE_NORTHWOOD      */
92 /* #define FORCE_PRESCOTT       */
93 /* #define FORCE_BANIAS         */
94 /* #define FORCE_YONAH          */
95 /* #define FORCE_CORE2          */
96 /* #define FORCE_PENRYN         */
97 /* #define FORCE_DUNNINGTON     */
98 /* #define FORCE_NEHALEM        */
99 /* #define FORCE_ATHLON         */
100 /* #define FORCE_OPTERON        */
101 /* #define FORCE_OPTERON_SSE3   */
102 /* #define FORCE_BARCELONA      */
103 /* #define FORCE_SHANGHAI       */
104 /* #define FORCE_ISTANBUL       */
105 /* #define FORCE_BULLDOZER      */
106 /* #define FORCE_BOBCAT */
107 /* #define FORCE_SSE_GENERIC    */
108 /* #define FORCE_VIAC3          */
109 /* #define FORCE_NANO           */
110 /* #define FORCE_POWER3         */
111 /* #define FORCE_POWER4         */
112 /* #define FORCE_POWER5         */
113 /* #define FORCE_POWER6         */
114 /* #define FORCE_PPCG4          */
115 /* #define FORCE_PPC970         */
116 /* #define FORCE_PPC970MP       */
117 /* #define FORCE_PPC440         */
118 /* #define FORCE_PPC440FP2      */
119 /* #define FORCE_CELL           */
120 /* #define FORCE_SICORTEX       */
121 /* #define FORCE_LOONGSON3A      */
122 /* #define FORCE_LOONGSON3B      */
123 /* #define FORCE_ITANIUM2       */
124 /* #define FORCE_GENERIC        */
125 /* #define FORCE_SPARC          */
126 /* #define FORCE_SPARCV7        */
127
128 #ifdef FORCE_P2
129 #define FORCE
130 #define FORCE_INTEL
131 #define ARCHITECTURE    "X86"
132 #define SUBARCHITECTURE "PENTIUM2"
133 #define ARCHCONFIG   "-DPENTIUM2 " \
134                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
135                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
136                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
137                      "-DHAVE_CMOV -DHAVE_MMX"
138 #define LIBNAME   "p2"
139 #define CORENAME  "P5"
140 #endif
141
142 #ifdef FORCE_COPPERMINE
143 #define FORCE
144 #define FORCE_INTEL
145 #define ARCHITECTURE    "X86"
146 #define SUBARCHITECTURE "PENTIUM3"
147 #define ARCHCONFIG   "-DPENTIUM3 " \
148                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
149                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
150                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
151                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
152 #define LIBNAME   "coppermine"
153 #define CORENAME  "COPPERMINE"
154 #endif
155
156 #ifdef FORCE_KATMAI
157 #define FORCE
158 #define FORCE_INTEL
159 #define ARCHITECTURE    "X86"
160 #define SUBARCHITECTURE "PENTIUM3"
161 #define ARCHCONFIG   "-DPENTIUM3 " \
162                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
163                      "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
164                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
165                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
166 #define LIBNAME   "katmai"
167 #define CORENAME  "KATMAI"
168 #endif
169
170 #ifdef FORCE_NORTHWOOD
171 #define FORCE
172 #define FORCE_INTEL
173 #define ARCHITECTURE    "X86"
174 #define SUBARCHITECTURE "PENTIUM4"
175 #define ARCHCONFIG   "-DPENTIUM4 " \
176                      "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
177                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
178                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
179                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
180 #define LIBNAME   "northwood"
181 #define CORENAME  "NORTHWOOD"
182 #endif
183
184 #ifdef FORCE_PRESCOTT
185 #define FORCE
186 #define FORCE_INTEL
187 #define ARCHITECTURE    "X86"
188 #define SUBARCHITECTURE "PENTIUM4"
189 #define ARCHCONFIG   "-DPENTIUM4 " \
190                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
191                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
192                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
193                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
194 #define LIBNAME   "prescott"
195 #define CORENAME  "PRESCOTT"
196 #endif
197
198 #ifdef FORCE_BANIAS
199 #define FORCE
200 #define FORCE_INTEL
201 #define ARCHITECTURE    "X86"
202 #define SUBARCHITECTURE "BANIAS"
203 #define ARCHCONFIG   "-DPENTIUMM " \
204                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
205                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
206                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
207                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
208 #define LIBNAME   "banias"
209 #define CORENAME  "BANIAS"
210 #endif
211
212 #ifdef FORCE_YONAH
213 #define FORCE
214 #define FORCE_INTEL
215 #define ARCHITECTURE    "X86"
216 #define SUBARCHITECTURE "YONAH"
217 #define ARCHCONFIG   "-DPENTIUMM " \
218                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
219                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
220                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
221                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
222 #define LIBNAME   "yonah"
223 #define CORENAME  "YONAH"
224 #endif
225
226 #ifdef FORCE_CORE2
227 #define FORCE
228 #define FORCE_INTEL
229 #define ARCHITECTURE    "X86"
230 #define SUBARCHITECTURE "CONRORE"
231 #define ARCHCONFIG   "-DCORE2 " \
232                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
233                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
234                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
235                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
236 #define LIBNAME   "core2"
237 #define CORENAME  "CORE2"
238 #endif
239
240 #ifdef FORCE_PENRYN
241 #define FORCE
242 #define FORCE_INTEL
243 #define ARCHITECTURE    "X86"
244 #define SUBARCHITECTURE "PENRYN"
245 #define ARCHCONFIG   "-DPENRYN " \
246                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
247                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
248                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
249                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
250 #define LIBNAME   "penryn"
251 #define CORENAME  "PENRYN"
252 #endif
253
254 #ifdef FORCE_DUNNINGTON
255 #define FORCE
256 #define FORCE_INTEL
257 #define ARCHITECTURE    "X86"
258 #define SUBARCHITECTURE "DUNNINGTON"
259 #define ARCHCONFIG   "-DDUNNINGTON " \
260                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
261                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
262                      "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
263                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
264                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
265 #define LIBNAME   "dunnington"
266 #define CORENAME  "DUNNINGTON"
267 #endif
268
269 #ifdef FORCE_NEHALEM
270 #define FORCE
271 #define FORCE_INTEL
272 #define ARCHITECTURE    "X86"
273 #define SUBARCHITECTURE "NEHALEM"
274 #define ARCHCONFIG   "-DNEHALEM " \
275                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
276                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
277                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
278                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
279 #define LIBNAME   "nehalem"
280 #define CORENAME  "NEHALEM"
281 #endif
282
283 #ifdef FORCE_SANDYBRIDGE
284 #define FORCE
285 #define FORCE_INTEL
286 #define ARCHITECTURE    "X86"
287 #define SUBARCHITECTURE "SANDYBRIDGE"
288 #define ARCHCONFIG   "-DSANDYBRIDGE " \
289                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
290                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
291                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
292                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
293 #define LIBNAME   "sandybridge"
294 #define CORENAME  "SANDYBRIDGE"
295 #endif
296
297 #ifdef FORCE_ATOM
298 #define FORCE
299 #define FORCE_INTEL
300 #define ARCHITECTURE    "X86"
301 #define SUBARCHITECTURE "ATOM"
302 #define ARCHCONFIG   "-DATOM " \
303                      "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
304                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
305                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
306                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
307 #define LIBNAME   "atom"
308 #define CORENAME  "ATOM"
309 #endif
310
311 #ifdef FORCE_ATHLON
312 #define FORCE
313 #define FORCE_INTEL
314 #define ARCHITECTURE    "X86"
315 #define SUBARCHITECTURE "ATHLON"
316 #define ARCHCONFIG   "-DATHLON " \
317                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
318                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
319                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW  " \
320                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
321 #define LIBNAME   "athlon"
322 #define CORENAME  "ATHLON"
323 #endif
324
325 #ifdef FORCE_OPTERON
326 #define FORCE
327 #define FORCE_INTEL
328 #define ARCHITECTURE    "X86"
329 #define SUBARCHITECTURE "OPTERON"
330 #define ARCHCONFIG   "-DOPTERON " \
331                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
332                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
333                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
334                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
335 #define LIBNAME   "opteron"
336 #define CORENAME  "OPTERON"
337 #endif
338
339 #ifdef FORCE_OPTERON_SSE3
340 #define FORCE
341 #define FORCE_INTEL
342 #define ARCHITECTURE    "X86"
343 #define SUBARCHITECTURE "OPTERON"
344 #define ARCHCONFIG   "-DOPTERON " \
345                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
346                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
347                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
348                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
349 #define LIBNAME   "opteron"
350 #define CORENAME  "OPTERON"
351 #endif
352
353 #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
354 #define FORCE
355 #define FORCE_INTEL
356 #define ARCHITECTURE    "X86"
357 #define SUBARCHITECTURE "BARCELONA"
358 #define ARCHCONFIG   "-DBARCELONA " \
359                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
360                      "-DL2_SIZE=524288 -DL2_LINESIZE=64  -DL3_SIZE=2097152 " \
361                      "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
362                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
363                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
364 #define LIBNAME   "barcelona"
365 #define CORENAME  "BARCELONA"
366 #endif
367
368 #if defined(FORCE_BOBCAT) 
369 #define FORCE
370 #define FORCE_INTEL
371 #define ARCHITECTURE    "X86"
372 #define SUBARCHITECTURE "BOBCAT"
373 #define ARCHCONFIG   "-DBOBCAT " \
374                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
375                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
376                      "-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
377                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
378                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
379 #define LIBNAME   "bobcat"
380 #define CORENAME  "BOBCAT"
381 #endif
382
383 #if defined (FORCE_BULLDOZER)
384 #define FORCE
385 #define FORCE_INTEL
386 #define ARCHITECTURE    "X86"
387 #define SUBARCHITECTURE "BULLDOZER"
388 #define ARCHCONFIG   "-DBARCELONA " \
389                      "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
390                      "-DL2_SIZE=1024000 -DL2_LINESIZE=64  -DL3_SIZE=16777216 " \
391                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
392                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
393                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" \
394                      "-DHAVE_AVX -DHAVE_FMA4"
395 #define LIBNAME   "bulldozer"
396 #define CORENAME  "BULLDOZER"
397 #endif
398
399 #ifdef FORCE_SSE_GENERIC
400 #define FORCE
401 #define FORCE_INTEL
402 #define ARCHITECTURE    "X86"
403 #define SUBARCHITECTURE "GENERIC"
404 #define ARCHCONFIG   "-DGENERIC " \
405                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
406                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
407                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
408                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
409 #define LIBNAME   "generic"
410 #define CORENAME  "GENERIC"
411 #endif
412
413 #ifdef FORCE_VIAC3
414 #define FORCE
415 #define FORCE_INTEL
416 #define ARCHITECTURE    "X86"
417 #define SUBARCHITECTURE "VIAC3"
418 #define ARCHCONFIG   "-DVIAC3 " \
419                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
420                      "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
421                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
422                      "-DHAVE_MMX -DHAVE_SSE "
423 #define LIBNAME   "viac3"
424 #define CORENAME  "VIAC3"
425 #endif
426
427 #ifdef FORCE_NANO
428 #define FORCE
429 #define FORCE_INTEL
430 #define ARCHITECTURE    "X86"
431 #define SUBARCHITECTURE "NANO"
432 #define ARCHCONFIG   "-DNANO " \
433                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
434                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
435                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
436                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
437 #define LIBNAME   "nano"
438 #define CORENAME  "NANO"
439 #endif
440
441 #ifdef FORCE_POWER3
442 #define FORCE
443 #define ARCHITECTURE    "POWER"
444 #define SUBARCHITECTURE "POWER3"
445 #define SUBDIRNAME      "power"
446 #define ARCHCONFIG   "-DPOWER3 " \
447                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
448                      "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
449                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
450 #define LIBNAME   "power3"
451 #define CORENAME  "POWER3"
452 #endif
453
454 #ifdef FORCE_POWER4
455 #define FORCE
456 #define ARCHITECTURE    "POWER"
457 #define SUBARCHITECTURE "POWER4"
458 #define SUBDIRNAME      "power"
459 #define ARCHCONFIG   "-DPOWER4 " \
460                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
461                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
462                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
463 #define LIBNAME   "power4"
464 #define CORENAME  "POWER4"
465 #endif
466
467 #ifdef FORCE_POWER5
468 #define FORCE
469 #define ARCHITECTURE    "POWER"
470 #define SUBARCHITECTURE "POWER5"
471 #define SUBDIRNAME      "power"
472 #define ARCHCONFIG   "-DPOWER5 " \
473                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
474                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
475                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
476 #define LIBNAME   "power5"
477 #define CORENAME  "POWER5"
478 #endif
479
480 #ifdef FORCE_POWER6
481 #define FORCE
482 #define ARCHITECTURE    "POWER"
483 #define SUBARCHITECTURE "POWER6"
484 #define SUBDIRNAME      "power"
485 #define ARCHCONFIG   "-DPOWER6 " \
486                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
487                      "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
488                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
489 #define LIBNAME   "power6"
490 #define CORENAME  "POWER6"
491 #endif
492
493 #ifdef FORCE_PPCG4
494 #define FORCE
495 #define ARCHITECTURE    "POWER"
496 #define SUBARCHITECTURE "PPCG4"
497 #define SUBDIRNAME      "power"
498 #define ARCHCONFIG   "-DPPCG4 " \
499                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
500                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
501                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
502 #define LIBNAME   "ppcg4"
503 #define CORENAME  "PPCG4"
504 #endif
505
506 #ifdef FORCE_PPC970
507 #define FORCE
508 #define ARCHITECTURE    "POWER"
509 #define SUBARCHITECTURE "PPC970"
510 #define SUBDIRNAME      "power"
511 #define ARCHCONFIG   "-DPPC970 " \
512                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
513                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
514                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
515 #define LIBNAME   "ppc970"
516 #define CORENAME  "PPC970"
517 #endif
518
519 #ifdef FORCE_PPC970MP
520 #define FORCE
521 #define ARCHITECTURE    "POWER"
522 #define SUBARCHITECTURE "PPC970"
523 #define SUBDIRNAME      "power"
524 #define ARCHCONFIG   "-DPPC970 " \
525                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
526                      "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
527                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
528 #define LIBNAME   "ppc970mp"
529 #define CORENAME  "PPC970"
530 #endif
531
532 #ifdef FORCE_PPC440
533 #define FORCE
534 #define ARCHITECTURE    "POWER"
535 #define SUBARCHITECTURE "PPC440"
536 #define SUBDIRNAME      "power"
537 #define ARCHCONFIG   "-DPPC440 " \
538                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
539                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
540                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
541 #define LIBNAME   "ppc440"
542 #define CORENAME  "PPC440"
543 #endif
544
545 #ifdef FORCE_PPC440FP2
546 #define FORCE
547 #define ARCHITECTURE    "POWER"
548 #define SUBARCHITECTURE "PPC440FP2"
549 #define SUBDIRNAME      "power"
550 #define ARCHCONFIG   "-DPPC440FP2 " \
551                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
552                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
553                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
554 #define LIBNAME   "ppc440FP2"
555 #define CORENAME  "PPC440FP2"
556 #endif
557
558 #ifdef FORCE_CELL
559 #define FORCE
560 #define ARCHITECTURE    "POWER"
561 #define SUBARCHITECTURE "CELL"
562 #define SUBDIRNAME      "power"
563 #define ARCHCONFIG   "-DCELL " \
564                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
565                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
566                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
567 #define LIBNAME   "cell"
568 #define CORENAME  "CELL"
569 #endif
570
571 #ifdef FORCE_SICORTEX
572 #define FORCE
573 #define ARCHITECTURE    "MIPS"
574 #define SUBARCHITECTURE "SICORTEX"
575 #define SUBDIRNAME      "mips"
576 #define ARCHCONFIG   "-DSICORTEX " \
577                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
578                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
579                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
580 #define LIBNAME   "mips"
581 #define CORENAME  "sicortex"
582 #endif
583
584
585 #ifdef FORCE_LOONGSON3A
586 #define FORCE
587 #define ARCHITECTURE    "MIPS"
588 #define SUBARCHITECTURE "LOONGSON3A"
589 #define SUBDIRNAME      "mips64"
590 #define ARCHCONFIG   "-DLOONGSON3A " \
591        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
592        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
593        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
594 #define LIBNAME   "loongson3a"
595 #define CORENAME  "LOONGSON3A"
596 #else
597 #endif
598
599 #ifdef FORCE_LOONGSON3B
600 #define FORCE
601 #define ARCHITECTURE    "MIPS"
602 #define SUBARCHITECTURE "LOONGSON3B"
603 #define SUBDIRNAME      "mips64"
604 #define ARCHCONFIG   "-DLOONGSON3B " \
605        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
606        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
607        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
608 #define LIBNAME   "loongson3b"
609 #define CORENAME  "LOONGSON3B"
610 #else
611 #endif
612
613 #ifdef FORCE_ITANIUM2
614 #define FORCE
615 #define ARCHITECTURE    "IA64"
616 #define SUBARCHITECTURE "ITANIUM2"
617 #define SUBDIRNAME      "ia64"
618 #define ARCHCONFIG   "-DITANIUM2 " \
619                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
620                      "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
621 #define LIBNAME   "itanium2"
622 #define CORENAME  "itanium2"
623 #endif
624
625 #ifdef FORCE_SPARC
626 #define FORCE
627 #define ARCHITECTURE    "SPARC"
628 #define SUBARCHITECTURE "SPARC"
629 #define SUBDIRNAME      "sparc"
630 #define ARCHCONFIG   "-DSPARC -DV9 " \
631                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
632                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
633 #define LIBNAME   "sparc"
634 #define CORENAME  "sparc"
635 #endif
636
637 #ifdef FORCE_SPARCV7
638 #define FORCE
639 #define ARCHITECTURE    "SPARC"
640 #define SUBARCHITECTURE "SPARC"
641 #define SUBDIRNAME      "sparc"
642 #define ARCHCONFIG   "-DSPARC -DV7 " \
643                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
644                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
645 #define LIBNAME   "sparcv7"
646 #define CORENAME  "sparcv7"
647 #endif
648
649 #ifdef FORCE_GENERIC
650 #define FORCE
651 #define ARCHITECTURE    "GENERIC"
652 #define SUBARCHITECTURE "GENERIC"
653 #define SUBDIRNAME      "generic"
654 #define ARCHCONFIG   "-DGENERIC " \
655                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
656                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
657                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
658 #define LIBNAME   "generic"
659 #define CORENAME  "generic"
660 #endif
661
662 #ifndef FORCE
663
664 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
665     defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
666 #ifndef POWER
667 #define POWER
668 #endif
669 #define OPENBLAS_SUPPORTED
670 #endif
671
672 #if defined(__i386__) || (__x86_64__)
673 #include "cpuid_x86.c"
674 #define OPENBLAS_SUPPORTED
675 #endif
676
677 #ifdef __ia64__
678 #include "cpuid_ia64.c"
679 #define OPENBLAS_SUPPORTED
680 #endif
681
682 #ifdef __alpha
683 #include "cpuid_alpha.c"
684 #define OPENBLAS_SUPPORTED
685 #endif
686
687 #ifdef POWER
688 #include "cpuid_power.c"
689 #define OPENBLAS_SUPPORTED
690 #endif
691
692 #ifdef sparc
693 #include "cpuid_sparc.c"
694 #define OPENBLAS_SUPPORTED
695 #endif
696
697 #ifdef __mips__
698 #include "cpuid_mips.c"
699 #define OPENBLAS_SUPPORTED
700 #endif
701
702 #ifndef OPENBLAS_SUPPORTED
703 #error "This arch/CPU is not supported by OpenBLAS."
704 #endif
705
706 #else
707
708 #endif
709
710 static int get_num_cores(void) {
711
712 #ifdef OS_WINDOWS
713   SYSTEM_INFO sysinfo;
714 #elif defined(__FreeBSD__) || defined(__APPLE__)
715   int m[2], count;
716   size_t len;
717 #endif
718   
719 #ifdef linux
720   return get_nprocs();
721   
722 #elif defined(OS_WINDOWS)
723
724   GetSystemInfo(&sysinfo);
725   return sysinfo.dwNumberOfProcessors;
726
727 #elif defined(__FreeBSD__) || defined(__APPLE__)
728   m[0] = CTL_HW;
729   m[1] = HW_NCPU;
730   len = sizeof(int);
731   sysctl(m, 2, &count, &len, NULL, 0);
732
733   return count;
734 #else
735   return 2;
736 #endif
737 }
738
739 int main(int argc, char *argv[]){
740
741 #ifdef FORCE
742   char buffer[8192], *p, *q;
743   int length;
744 #endif
745
746   if (argc == 1) return 0;
747
748   switch (argv[1][0]) {
749
750   case '0' : /* for Makefile */
751
752 #ifdef FORCE
753     printf("CORE=%s\n", CORENAME);
754 #else    
755 #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
756     printf("CORE=%s\n", get_corename());
757 #endif
758 #endif
759
760 #ifdef FORCE
761     printf("LIBCORE=%s\n", LIBNAME);
762 #else
763     printf("LIBCORE=");
764     get_libname();
765     printf("\n");
766 #endif
767
768     printf("NUM_CORES=%d\n", get_num_cores());
769
770 #if defined(__i386__) || defined(__x86_64__)
771 #ifndef FORCE
772     get_sse();
773 #else
774
775     sprintf(buffer, "%s", ARCHCONFIG);
776
777     p = &buffer[0];
778
779     while (*p) {
780       if ((*p == '-') && (*(p + 1) == 'D')) {
781         p += 2;
782
783         while ((*p != ' ') && (*p != '\0')) {
784
785           if (*p == '=') {
786             printf("=");
787             p ++;
788             while ((*p != ' ') && (*p != '\0')) {
789               printf("%c", *p);
790               p ++;
791             }
792           } else {
793             printf("%c", *p);
794             p ++;
795             if ((*p == ' ') || (*p =='\0')) printf("=1");
796           }
797         }
798
799         printf("\n");
800       } else p ++;
801     }
802 #endif
803 #endif
804
805 #ifndef OS_WINDOWS
806     printf("MAKE += -j %d\n", get_num_cores());
807 #endif
808
809     break;
810
811   case '1' : /* For config.h */
812 #ifdef FORCE
813     sprintf(buffer, "%s -DCORE_%s\n", ARCHCONFIG, CORENAME);
814
815     p = &buffer[0];
816     while (*p) {
817       if ((*p == '-') && (*(p + 1) == 'D')) {
818         p += 2;
819         printf("#define ");
820
821         while ((*p != ' ') && (*p != '\0')) {
822
823           if (*p == '=') {
824             printf(" ");
825             p ++;
826             while ((*p != ' ') && (*p != '\0')) {
827               printf("%c", *p);
828               p ++;
829             }
830           } else {
831             printf("%c", *p);
832             p ++;
833           }
834         }
835
836         printf("\n");
837       } else p ++;
838     }
839 #else
840     get_cpuconfig();
841 #endif
842  break;
843
844   case '2' : /* SMP */
845     if (get_num_cores() > 1) printf("SMP=1\n");
846     break;
847   }
848
849   fflush(stdout);
850
851   return 0;
852 }
853