Support AMD Piledriver by bulldozer kernels.
[platform/upstream/openblas.git] / getarch.c
1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the
15       distribution.
16    3. Neither the name of the ISCAS nor the names of its contributors may 
17       be used to endorse or promote products derived from this software 
18       without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 **********************************************************************************/
32
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin.           */
35 /* All rights reserved.                                              */
36 /*                                                                   */
37 /* Redistribution and use in source and binary forms, with or        */
38 /* without modification, are permitted provided that the following   */
39 /* conditions are met:                                               */
40 /*                                                                   */
41 /*   1. Redistributions of source code must retain the above         */
42 /*      copyright notice, this list of conditions and the following  */
43 /*      disclaimer.                                                  */
44 /*                                                                   */
45 /*   2. Redistributions in binary form must reproduce the above      */
46 /*      copyright notice, this list of conditions and the following  */
47 /*      disclaimer in the documentation and/or other materials       */
48 /*      provided with the distribution.                              */
49 /*                                                                   */
50 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
51 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
52 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
53 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
54 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
55 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
56 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
57 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
58 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
59 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
60 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
61 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
62 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
63 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
64 /*                                                                   */
65 /* The views and conclusions contained in the software and           */
66 /* documentation are those of the authors and should not be          */
67 /* interpreted as representing official policies, either expressed   */
68 /* or implied, of The University of Texas at Austin.                 */
69 /*********************************************************************/
70
71 #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
72 #define OS_WINDOWS
73 #endif
74
75 #include <stdio.h>
76 #include <string.h>
77 #ifdef OS_WINDOWS
78 #include <windows.h>
79 #endif
80 #if defined(__FreeBSD__) || defined(__APPLE__)
81 #include <sys/types.h>
82 #include <sys/sysctl.h>
83 #endif
84 #ifdef linux
85 #include <sys/sysinfo.h>
86 #endif
87
88 /* #define FORCE_P2             */
89 /* #define FORCE_KATMAI         */
90 /* #define FORCE_COPPERMINE     */
91 /* #define FORCE_NORTHWOOD      */
92 /* #define FORCE_PRESCOTT       */
93 /* #define FORCE_BANIAS         */
94 /* #define FORCE_YONAH          */
95 /* #define FORCE_CORE2          */
96 /* #define FORCE_PENRYN         */
97 /* #define FORCE_DUNNINGTON     */
98 /* #define FORCE_NEHALEM        */
99 /* #define FORCE_SANDYBRIDGE    */
100 /* #define FORCE_ATOM           */
101 /* #define FORCE_ATHLON         */
102 /* #define FORCE_OPTERON        */
103 /* #define FORCE_OPTERON_SSE3   */
104 /* #define FORCE_BARCELONA      */
105 /* #define FORCE_SHANGHAI       */
106 /* #define FORCE_ISTANBUL       */
107 /* #define FORCE_BOBCAT         */
108 /* #define FORCE_BULLDOZER      */
109 /* #define FORCE_PILEDRIVER     */
110 /* #define FORCE_SSE_GENERIC    */
111 /* #define FORCE_VIAC3          */
112 /* #define FORCE_NANO           */
113 /* #define FORCE_POWER3         */
114 /* #define FORCE_POWER4         */
115 /* #define FORCE_POWER5         */
116 /* #define FORCE_POWER6         */
117 /* #define FORCE_PPCG4          */
118 /* #define FORCE_PPC970         */
119 /* #define FORCE_PPC970MP       */
120 /* #define FORCE_PPC440         */
121 /* #define FORCE_PPC440FP2      */
122 /* #define FORCE_CELL           */
123 /* #define FORCE_SICORTEX       */
124 /* #define FORCE_LOONGSON3A     */
125 /* #define FORCE_LOONGSON3B     */
126 /* #define FORCE_ITANIUM2       */
127 /* #define FORCE_SPARC          */
128 /* #define FORCE_SPARCV7        */
129 /* #define FORCE_GENERIC        */
130
131 #ifdef FORCE_P2
132 #define FORCE
133 #define FORCE_INTEL
134 #define ARCHITECTURE    "X86"
135 #define SUBARCHITECTURE "PENTIUM2"
136 #define ARCHCONFIG   "-DPENTIUM2 " \
137                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
138                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
139                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
140                      "-DHAVE_CMOV -DHAVE_MMX"
141 #define LIBNAME   "p2"
142 #define CORENAME  "P5"
143 #endif
144
145 #ifdef FORCE_KATMAI
146 #define FORCE
147 #define FORCE_INTEL
148 #define ARCHITECTURE    "X86"
149 #define SUBARCHITECTURE "PENTIUM3"
150 #define ARCHCONFIG   "-DPENTIUM3 " \
151                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
152                      "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
153                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
154                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
155 #define LIBNAME   "katmai"
156 #define CORENAME  "KATMAI"
157 #endif
158
159 #ifdef FORCE_COPPERMINE
160 #define FORCE
161 #define FORCE_INTEL
162 #define ARCHITECTURE    "X86"
163 #define SUBARCHITECTURE "PENTIUM3"
164 #define ARCHCONFIG   "-DPENTIUM3 " \
165                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
166                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
167                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
168                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
169 #define LIBNAME   "coppermine"
170 #define CORENAME  "COPPERMINE"
171 #endif
172
173 #ifdef FORCE_NORTHWOOD
174 #define FORCE
175 #define FORCE_INTEL
176 #define ARCHITECTURE    "X86"
177 #define SUBARCHITECTURE "PENTIUM4"
178 #define ARCHCONFIG   "-DPENTIUM4 " \
179                      "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
180                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
181                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
182                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
183 #define LIBNAME   "northwood"
184 #define CORENAME  "NORTHWOOD"
185 #endif
186
187 #ifdef FORCE_PRESCOTT
188 #define FORCE
189 #define FORCE_INTEL
190 #define ARCHITECTURE    "X86"
191 #define SUBARCHITECTURE "PENTIUM4"
192 #define ARCHCONFIG   "-DPENTIUM4 " \
193                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
194                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
195                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
196                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
197 #define LIBNAME   "prescott"
198 #define CORENAME  "PRESCOTT"
199 #endif
200
201 #ifdef FORCE_BANIAS
202 #define FORCE
203 #define FORCE_INTEL
204 #define ARCHITECTURE    "X86"
205 #define SUBARCHITECTURE "BANIAS"
206 #define ARCHCONFIG   "-DPENTIUMM " \
207                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
208                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
209                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
210                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
211 #define LIBNAME   "banias"
212 #define CORENAME  "BANIAS"
213 #endif
214
215 #ifdef FORCE_YONAH
216 #define FORCE
217 #define FORCE_INTEL
218 #define ARCHITECTURE    "X86"
219 #define SUBARCHITECTURE "YONAH"
220 #define ARCHCONFIG   "-DPENTIUMM " \
221                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
222                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
223                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
224                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
225 #define LIBNAME   "yonah"
226 #define CORENAME  "YONAH"
227 #endif
228
229 #ifdef FORCE_CORE2
230 #define FORCE
231 #define FORCE_INTEL
232 #define ARCHITECTURE    "X86"
233 #define SUBARCHITECTURE "CONRORE"
234 #define ARCHCONFIG   "-DCORE2 " \
235                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
236                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
237                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
238                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
239 #define LIBNAME   "core2"
240 #define CORENAME  "CORE2"
241 #endif
242
243 #ifdef FORCE_PENRYN
244 #define FORCE
245 #define FORCE_INTEL
246 #define ARCHITECTURE    "X86"
247 #define SUBARCHITECTURE "PENRYN"
248 #define ARCHCONFIG   "-DPENRYN " \
249                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
250                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
251                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
252                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
253 #define LIBNAME   "penryn"
254 #define CORENAME  "PENRYN"
255 #endif
256
257 #ifdef FORCE_DUNNINGTON
258 #define FORCE
259 #define FORCE_INTEL
260 #define ARCHITECTURE    "X86"
261 #define SUBARCHITECTURE "DUNNINGTON"
262 #define ARCHCONFIG   "-DDUNNINGTON " \
263                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
264                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
265                      "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
266                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
267                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
268 #define LIBNAME   "dunnington"
269 #define CORENAME  "DUNNINGTON"
270 #endif
271
272 #ifdef FORCE_NEHALEM
273 #define FORCE
274 #define FORCE_INTEL
275 #define ARCHITECTURE    "X86"
276 #define SUBARCHITECTURE "NEHALEM"
277 #define ARCHCONFIG   "-DNEHALEM " \
278                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
279                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
280                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
281                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
282 #define LIBNAME   "nehalem"
283 #define CORENAME  "NEHALEM"
284 #endif
285
286 #ifdef FORCE_SANDYBRIDGE
287 #define FORCE
288 #define FORCE_INTEL
289 #define ARCHITECTURE    "X86"
290 #define SUBARCHITECTURE "SANDYBRIDGE"
291 #define ARCHCONFIG   "-DSANDYBRIDGE " \
292                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
293                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
294                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
295                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
296 #define LIBNAME   "sandybridge"
297 #define CORENAME  "SANDYBRIDGE"
298 #endif
299
300 #ifdef FORCE_ATOM
301 #define FORCE
302 #define FORCE_INTEL
303 #define ARCHITECTURE    "X86"
304 #define SUBARCHITECTURE "ATOM"
305 #define ARCHCONFIG   "-DATOM " \
306                      "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
307                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
308                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
309                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
310 #define LIBNAME   "atom"
311 #define CORENAME  "ATOM"
312 #endif
313
314 #ifdef FORCE_ATHLON
315 #define FORCE
316 #define FORCE_INTEL
317 #define ARCHITECTURE    "X86"
318 #define SUBARCHITECTURE "ATHLON"
319 #define ARCHCONFIG   "-DATHLON " \
320                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
321                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
322                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW  " \
323                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
324 #define LIBNAME   "athlon"
325 #define CORENAME  "ATHLON"
326 #endif
327
328 #ifdef FORCE_OPTERON
329 #define FORCE
330 #define FORCE_INTEL
331 #define ARCHITECTURE    "X86"
332 #define SUBARCHITECTURE "OPTERON"
333 #define ARCHCONFIG   "-DOPTERON " \
334                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
335                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
336                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
337                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
338 #define LIBNAME   "opteron"
339 #define CORENAME  "OPTERON"
340 #endif
341
342 #ifdef FORCE_OPTERON_SSE3
343 #define FORCE
344 #define FORCE_INTEL
345 #define ARCHITECTURE    "X86"
346 #define SUBARCHITECTURE "OPTERON"
347 #define ARCHCONFIG   "-DOPTERON " \
348                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
349                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
350                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
351                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
352 #define LIBNAME   "opteron"
353 #define CORENAME  "OPTERON"
354 #endif
355
356 #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
357 #define FORCE
358 #define FORCE_INTEL
359 #define ARCHITECTURE    "X86"
360 #define SUBARCHITECTURE "BARCELONA"
361 #define ARCHCONFIG   "-DBARCELONA " \
362                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
363                      "-DL2_SIZE=524288 -DL2_LINESIZE=64  -DL3_SIZE=2097152 " \
364                      "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
365                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
366                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
367 #define LIBNAME   "barcelona"
368 #define CORENAME  "BARCELONA"
369 #endif
370
371 #if defined(FORCE_BOBCAT) 
372 #define FORCE
373 #define FORCE_INTEL
374 #define ARCHITECTURE    "X86"
375 #define SUBARCHITECTURE "BOBCAT"
376 #define ARCHCONFIG   "-DBOBCAT " \
377                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
378                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
379                      "-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
380                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
381                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
382 #define LIBNAME   "bobcat"
383 #define CORENAME  "BOBCAT"
384 #endif
385
386 #if defined (FORCE_BULLDOZER)
387 #define FORCE
388 #define FORCE_INTEL
389 #define ARCHITECTURE    "X86"
390 #define SUBARCHITECTURE "BULLDOZER"
391 #define ARCHCONFIG   "-DBULLDOZER " \
392                      "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
393                      "-DL2_SIZE=1024000 -DL2_LINESIZE=64  -DL3_SIZE=16777216 " \
394                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
395                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
396                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
397                      "-DHAVE_AVX -DHAVE_FMA4"
398 #define LIBNAME   "bulldozer"
399 #define CORENAME  "BULLDOZER"
400 #endif
401
402 #if defined (FORCE_PILEDRIVER)
403 #define FORCE
404 #define FORCE_INTEL
405 #define ARCHITECTURE    "X86"
406 #define SUBARCHITECTURE "PILEDRIVER"
407 #define ARCHCONFIG   "-DPILEDRIVER " \
408                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
409                      "-DL2_SIZE=2097152 -DL2_LINESIZE=64  -DL3_SIZE=12582912 " \
410                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
411                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2" \
412                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH" \
413                      "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
414 #define LIBNAME   "piledriver"
415 #define CORENAME  "PILEDRIVER"
416 #endif
417
418 #ifdef FORCE_SSE_GENERIC
419 #define FORCE
420 #define FORCE_INTEL
421 #define ARCHITECTURE    "X86"
422 #define SUBARCHITECTURE "GENERIC"
423 #define ARCHCONFIG   "-DGENERIC " \
424                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
425                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
426                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
427                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
428 #define LIBNAME   "generic"
429 #define CORENAME  "GENERIC"
430 #endif
431
432 #ifdef FORCE_VIAC3
433 #define FORCE
434 #define FORCE_INTEL
435 #define ARCHITECTURE    "X86"
436 #define SUBARCHITECTURE "VIAC3"
437 #define ARCHCONFIG   "-DVIAC3 " \
438                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
439                      "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
440                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
441                      "-DHAVE_MMX -DHAVE_SSE "
442 #define LIBNAME   "viac3"
443 #define CORENAME  "VIAC3"
444 #endif
445
446 #ifdef FORCE_NANO
447 #define FORCE
448 #define FORCE_INTEL
449 #define ARCHITECTURE    "X86"
450 #define SUBARCHITECTURE "NANO"
451 #define ARCHCONFIG   "-DNANO " \
452                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
453                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
454                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
455                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
456 #define LIBNAME   "nano"
457 #define CORENAME  "NANO"
458 #endif
459
460 #ifdef FORCE_POWER3
461 #define FORCE
462 #define ARCHITECTURE    "POWER"
463 #define SUBARCHITECTURE "POWER3"
464 #define SUBDIRNAME      "power"
465 #define ARCHCONFIG   "-DPOWER3 " \
466                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
467                      "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
468                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
469 #define LIBNAME   "power3"
470 #define CORENAME  "POWER3"
471 #endif
472
473 #ifdef FORCE_POWER4
474 #define FORCE
475 #define ARCHITECTURE    "POWER"
476 #define SUBARCHITECTURE "POWER4"
477 #define SUBDIRNAME      "power"
478 #define ARCHCONFIG   "-DPOWER4 " \
479                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
480                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
481                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
482 #define LIBNAME   "power4"
483 #define CORENAME  "POWER4"
484 #endif
485
486 #ifdef FORCE_POWER5
487 #define FORCE
488 #define ARCHITECTURE    "POWER"
489 #define SUBARCHITECTURE "POWER5"
490 #define SUBDIRNAME      "power"
491 #define ARCHCONFIG   "-DPOWER5 " \
492                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
493                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
494                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
495 #define LIBNAME   "power5"
496 #define CORENAME  "POWER5"
497 #endif
498
499 #ifdef FORCE_POWER6
500 #define FORCE
501 #define ARCHITECTURE    "POWER"
502 #define SUBARCHITECTURE "POWER6"
503 #define SUBDIRNAME      "power"
504 #define ARCHCONFIG   "-DPOWER6 " \
505                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
506                      "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
507                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
508 #define LIBNAME   "power6"
509 #define CORENAME  "POWER6"
510 #endif
511
512 #ifdef FORCE_PPCG4
513 #define FORCE
514 #define ARCHITECTURE    "POWER"
515 #define SUBARCHITECTURE "PPCG4"
516 #define SUBDIRNAME      "power"
517 #define ARCHCONFIG   "-DPPCG4 " \
518                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
519                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
520                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
521 #define LIBNAME   "ppcg4"
522 #define CORENAME  "PPCG4"
523 #endif
524
525 #ifdef FORCE_PPC970
526 #define FORCE
527 #define ARCHITECTURE    "POWER"
528 #define SUBARCHITECTURE "PPC970"
529 #define SUBDIRNAME      "power"
530 #define ARCHCONFIG   "-DPPC970 " \
531                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
532                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
533                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
534 #define LIBNAME   "ppc970"
535 #define CORENAME  "PPC970"
536 #endif
537
538 #ifdef FORCE_PPC970MP
539 #define FORCE
540 #define ARCHITECTURE    "POWER"
541 #define SUBARCHITECTURE "PPC970"
542 #define SUBDIRNAME      "power"
543 #define ARCHCONFIG   "-DPPC970 " \
544                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
545                      "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
546                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
547 #define LIBNAME   "ppc970mp"
548 #define CORENAME  "PPC970"
549 #endif
550
551 #ifdef FORCE_PPC440
552 #define FORCE
553 #define ARCHITECTURE    "POWER"
554 #define SUBARCHITECTURE "PPC440"
555 #define SUBDIRNAME      "power"
556 #define ARCHCONFIG   "-DPPC440 " \
557                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
558                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
559                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
560 #define LIBNAME   "ppc440"
561 #define CORENAME  "PPC440"
562 #endif
563
564 #ifdef FORCE_PPC440FP2
565 #define FORCE
566 #define ARCHITECTURE    "POWER"
567 #define SUBARCHITECTURE "PPC440FP2"
568 #define SUBDIRNAME      "power"
569 #define ARCHCONFIG   "-DPPC440FP2 " \
570                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
571                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
572                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
573 #define LIBNAME   "ppc440FP2"
574 #define CORENAME  "PPC440FP2"
575 #endif
576
577 #ifdef FORCE_CELL
578 #define FORCE
579 #define ARCHITECTURE    "POWER"
580 #define SUBARCHITECTURE "CELL"
581 #define SUBDIRNAME      "power"
582 #define ARCHCONFIG   "-DCELL " \
583                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
584                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
585                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
586 #define LIBNAME   "cell"
587 #define CORENAME  "CELL"
588 #endif
589
590 #ifdef FORCE_SICORTEX
591 #define FORCE
592 #define ARCHITECTURE    "MIPS"
593 #define SUBARCHITECTURE "SICORTEX"
594 #define SUBDIRNAME      "mips"
595 #define ARCHCONFIG   "-DSICORTEX " \
596                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
597                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
598                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
599 #define LIBNAME   "mips"
600 #define CORENAME  "sicortex"
601 #endif
602
603
604 #ifdef FORCE_LOONGSON3A
605 #define FORCE
606 #define ARCHITECTURE    "MIPS"
607 #define SUBARCHITECTURE "LOONGSON3A"
608 #define SUBDIRNAME      "mips64"
609 #define ARCHCONFIG   "-DLOONGSON3A " \
610        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
611        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
612        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
613 #define LIBNAME   "loongson3a"
614 #define CORENAME  "LOONGSON3A"
615 #else
616 #endif
617
618 #ifdef FORCE_LOONGSON3B
619 #define FORCE
620 #define ARCHITECTURE    "MIPS"
621 #define SUBARCHITECTURE "LOONGSON3B"
622 #define SUBDIRNAME      "mips64"
623 #define ARCHCONFIG   "-DLOONGSON3B " \
624        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
625        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
626        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
627 #define LIBNAME   "loongson3b"
628 #define CORENAME  "LOONGSON3B"
629 #else
630 #endif
631
632 #ifdef FORCE_ITANIUM2
633 #define FORCE
634 #define ARCHITECTURE    "IA64"
635 #define SUBARCHITECTURE "ITANIUM2"
636 #define SUBDIRNAME      "ia64"
637 #define ARCHCONFIG   "-DITANIUM2 " \
638                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
639                      "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
640 #define LIBNAME   "itanium2"
641 #define CORENAME  "itanium2"
642 #endif
643
644 #ifdef FORCE_SPARC
645 #define FORCE
646 #define ARCHITECTURE    "SPARC"
647 #define SUBARCHITECTURE "SPARC"
648 #define SUBDIRNAME      "sparc"
649 #define ARCHCONFIG   "-DSPARC -DV9 " \
650                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
651                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
652 #define LIBNAME   "sparc"
653 #define CORENAME  "sparc"
654 #endif
655
656 #ifdef FORCE_SPARCV7
657 #define FORCE
658 #define ARCHITECTURE    "SPARC"
659 #define SUBARCHITECTURE "SPARC"
660 #define SUBDIRNAME      "sparc"
661 #define ARCHCONFIG   "-DSPARC -DV7 " \
662                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
663                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
664 #define LIBNAME   "sparcv7"
665 #define CORENAME  "sparcv7"
666 #endif
667
668 #ifdef FORCE_GENERIC
669 #define FORCE
670 #define ARCHITECTURE    "GENERIC"
671 #define SUBARCHITECTURE "GENERIC"
672 #define SUBDIRNAME      "generic"
673 #define ARCHCONFIG   "-DGENERIC " \
674                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
675                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
676                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
677 #define LIBNAME   "generic"
678 #define CORENAME  "generic"
679 #endif
680
681 #ifndef FORCE
682
683 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
684     defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
685 #ifndef POWER
686 #define POWER
687 #endif
688 #define OPENBLAS_SUPPORTED
689 #endif
690
691 #if defined(__i386__) || (__x86_64__)
692 #include "cpuid_x86.c"
693 #define OPENBLAS_SUPPORTED
694 #endif
695
696 #ifdef __ia64__
697 #include "cpuid_ia64.c"
698 #define OPENBLAS_SUPPORTED
699 #endif
700
701 #ifdef __alpha
702 #include "cpuid_alpha.c"
703 #define OPENBLAS_SUPPORTED
704 #endif
705
706 #ifdef POWER
707 #include "cpuid_power.c"
708 #define OPENBLAS_SUPPORTED
709 #endif
710
711 #ifdef sparc
712 #include "cpuid_sparc.c"
713 #define OPENBLAS_SUPPORTED
714 #endif
715
716 #ifdef __mips__
717 #include "cpuid_mips.c"
718 #define OPENBLAS_SUPPORTED
719 #endif
720
721 #ifndef OPENBLAS_SUPPORTED
722 #error "This arch/CPU is not supported by OpenBLAS."
723 #endif
724
725 #else
726
727 #endif
728
729 static int get_num_cores(void) {
730
731 #ifdef OS_WINDOWS
732   SYSTEM_INFO sysinfo;
733 #elif defined(__FreeBSD__) || defined(__APPLE__)
734   int m[2], count;
735   size_t len;
736 #endif
737   
738 #ifdef linux
739   return get_nprocs();
740   
741 #elif defined(OS_WINDOWS)
742
743   GetSystemInfo(&sysinfo);
744   return sysinfo.dwNumberOfProcessors;
745
746 #elif defined(__FreeBSD__) || defined(__APPLE__)
747   m[0] = CTL_HW;
748   m[1] = HW_NCPU;
749   len = sizeof(int);
750   sysctl(m, 2, &count, &len, NULL, 0);
751
752   return count;
753 #else
754   return 2;
755 #endif
756 }
757
758 int main(int argc, char *argv[]){
759
760 #ifdef FORCE
761   char buffer[8192], *p, *q;
762   int length;
763 #endif
764
765   if (argc == 1) return 0;
766
767   switch (argv[1][0]) {
768
769   case '0' : /* for Makefile */
770
771 #ifdef FORCE
772     printf("CORE=%s\n", CORENAME);
773 #else    
774 #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
775     printf("CORE=%s\n", get_corename());
776 #endif
777 #endif
778
779 #ifdef FORCE
780     printf("LIBCORE=%s\n", LIBNAME);
781 #else
782     printf("LIBCORE=");
783     get_libname();
784     printf("\n");
785 #endif
786
787     printf("NUM_CORES=%d\n", get_num_cores());
788
789 #if defined(__i386__) || defined(__x86_64__)
790 #ifndef FORCE
791     get_sse();
792 #else
793
794     sprintf(buffer, "%s", ARCHCONFIG);
795
796     p = &buffer[0];
797
798     while (*p) {
799       if ((*p == '-') && (*(p + 1) == 'D')) {
800         p += 2;
801
802         while ((*p != ' ') && (*p != '\0')) {
803
804           if (*p == '=') {
805             printf("=");
806             p ++;
807             while ((*p != ' ') && (*p != '\0')) {
808               printf("%c", *p);
809               p ++;
810             }
811           } else {
812             printf("%c", *p);
813             p ++;
814             if ((*p == ' ') || (*p =='\0')) printf("=1");
815           }
816         }
817
818         printf("\n");
819       } else p ++;
820     }
821 #endif
822 #endif
823
824 #if NO_PARALLEL_MAKE==1
825     printf("MAKE += -j 1\n");
826 #else
827 #ifndef OS_WINDOWS
828     printf("MAKE += -j %d\n", get_num_cores());
829 #endif
830 #endif
831
832     break;
833
834   case '1' : /* For config.h */
835 #ifdef FORCE
836     sprintf(buffer, "%s -DCORE_%s\n", ARCHCONFIG, CORENAME);
837
838     p = &buffer[0];
839     while (*p) {
840       if ((*p == '-') && (*(p + 1) == 'D')) {
841         p += 2;
842         printf("#define ");
843
844         while ((*p != ' ') && (*p != '\0')) {
845
846           if (*p == '=') {
847             printf(" ");
848             p ++;
849             while ((*p != ' ') && (*p != '\0')) {
850               printf("%c", *p);
851               p ++;
852             }
853           } else {
854             printf("%c", *p);
855             p ++;
856           }
857         }
858
859         printf("\n");
860       } else p ++;
861     }
862 #else
863     get_cpuconfig();
864 #endif
865  break;
866
867   case '2' : /* SMP */
868     if (get_num_cores() > 1) printf("SMP=1\n");
869     break;
870   }
871
872   fflush(stdout);
873
874   return 0;
875 }
876