modified getarch.c
[platform/upstream/openblas.git] / getarch.c
1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the
15       distribution.
16    3. Neither the name of the ISCAS nor the names of its contributors may 
17       be used to endorse or promote products derived from this software 
18       without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 **********************************************************************************/
32
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin.           */
35 /* All rights reserved.                                              */
36 /*                                                                   */
37 /* Redistribution and use in source and binary forms, with or        */
38 /* without modification, are permitted provided that the following   */
39 /* conditions are met:                                               */
40 /*                                                                   */
41 /*   1. Redistributions of source code must retain the above         */
42 /*      copyright notice, this list of conditions and the following  */
43 /*      disclaimer.                                                  */
44 /*                                                                   */
45 /*   2. Redistributions in binary form must reproduce the above      */
46 /*      copyright notice, this list of conditions and the following  */
47 /*      disclaimer in the documentation and/or other materials       */
48 /*      provided with the distribution.                              */
49 /*                                                                   */
50 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
51 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
52 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
53 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
54 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
55 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
56 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
57 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
58 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
59 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
60 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
61 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
62 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
63 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
64 /*                                                                   */
65 /* The views and conclusions contained in the software and           */
66 /* documentation are those of the authors and should not be          */
67 /* interpreted as representing official policies, either expressed   */
68 /* or implied, of The University of Texas at Austin.                 */
69 /*********************************************************************/
70
71 #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
72 #define OS_WINDOWS
73 #endif
74
75 #include <stdio.h>
76 #include <string.h>
77 #ifdef OS_WINDOWS
78 #include <windows.h>
79 #endif
80 #if defined(__FreeBSD__) || defined(__APPLE__)
81 #include <sys/types.h>
82 #include <sys/sysctl.h>
83 #endif
84 #ifdef linux
85 #include <sys/sysinfo.h>
86 #include <unistd.h>
87 #endif
88
89 /* #define FORCE_P2             */
90 /* #define FORCE_KATMAI         */
91 /* #define FORCE_COPPERMINE     */
92 /* #define FORCE_NORTHWOOD      */
93 /* #define FORCE_PRESCOTT       */
94 /* #define FORCE_BANIAS         */
95 /* #define FORCE_YONAH          */
96 /* #define FORCE_CORE2          */
97 /* #define FORCE_PENRYN         */
98 /* #define FORCE_DUNNINGTON     */
99 /* #define FORCE_NEHALEM        */
100 /* #define FORCE_SANDYBRIDGE    */
101 /* #define FORCE_ATOM           */
102 /* #define FORCE_ATHLON         */
103 /* #define FORCE_OPTERON        */
104 /* #define FORCE_OPTERON_SSE3   */
105 /* #define FORCE_BARCELONA      */
106 /* #define FORCE_SHANGHAI       */
107 /* #define FORCE_ISTANBUL       */
108 /* #define FORCE_BOBCAT         */
109 /* #define FORCE_BULLDOZER      */
110 /* #define FORCE_PILEDRIVER     */
111 /* #define FORCE_SSE_GENERIC    */
112 /* #define FORCE_VIAC3          */
113 /* #define FORCE_NANO           */
114 /* #define FORCE_POWER3         */
115 /* #define FORCE_POWER4         */
116 /* #define FORCE_POWER5         */
117 /* #define FORCE_POWER6         */
118 /* #define FORCE_PPCG4          */
119 /* #define FORCE_PPC970         */
120 /* #define FORCE_PPC970MP       */
121 /* #define FORCE_PPC440         */
122 /* #define FORCE_PPC440FP2      */
123 /* #define FORCE_CELL           */
124 /* #define FORCE_SICORTEX       */
125 /* #define FORCE_LOONGSON3A     */
126 /* #define FORCE_LOONGSON3B     */
127 /* #define FORCE_ITANIUM2       */
128 /* #define FORCE_SPARC          */
129 /* #define FORCE_SPARCV7        */
130 /* #define FORCE_GENERIC        */
131
132 #ifdef FORCE_P2
133 #define FORCE
134 #define FORCE_INTEL
135 #define ARCHITECTURE    "X86"
136 #define SUBARCHITECTURE "PENTIUM2"
137 #define ARCHCONFIG   "-DPENTIUM2 " \
138                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
139                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
140                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
141                      "-DHAVE_CMOV -DHAVE_MMX"
142 #define LIBNAME   "p2"
143 #define CORENAME  "P5"
144 #endif
145
146 #ifdef FORCE_KATMAI
147 #define FORCE
148 #define FORCE_INTEL
149 #define ARCHITECTURE    "X86"
150 #define SUBARCHITECTURE "PENTIUM3"
151 #define ARCHCONFIG   "-DPENTIUM3 " \
152                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
153                      "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
154                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
155                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
156 #define LIBNAME   "katmai"
157 #define CORENAME  "KATMAI"
158 #endif
159
160 #ifdef FORCE_COPPERMINE
161 #define FORCE
162 #define FORCE_INTEL
163 #define ARCHITECTURE    "X86"
164 #define SUBARCHITECTURE "PENTIUM3"
165 #define ARCHCONFIG   "-DPENTIUM3 " \
166                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
167                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
168                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
169                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
170 #define LIBNAME   "coppermine"
171 #define CORENAME  "COPPERMINE"
172 #endif
173
174 #ifdef FORCE_NORTHWOOD
175 #define FORCE
176 #define FORCE_INTEL
177 #define ARCHITECTURE    "X86"
178 #define SUBARCHITECTURE "PENTIUM4"
179 #define ARCHCONFIG   "-DPENTIUM4 " \
180                      "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
181                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
182                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
183                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
184 #define LIBNAME   "northwood"
185 #define CORENAME  "NORTHWOOD"
186 #endif
187
188 #ifdef FORCE_PRESCOTT
189 #define FORCE
190 #define FORCE_INTEL
191 #define ARCHITECTURE    "X86"
192 #define SUBARCHITECTURE "PENTIUM4"
193 #define ARCHCONFIG   "-DPENTIUM4 " \
194                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
195                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
196                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
197                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
198 #define LIBNAME   "prescott"
199 #define CORENAME  "PRESCOTT"
200 #endif
201
202 #ifdef FORCE_BANIAS
203 #define FORCE
204 #define FORCE_INTEL
205 #define ARCHITECTURE    "X86"
206 #define SUBARCHITECTURE "BANIAS"
207 #define ARCHCONFIG   "-DPENTIUMM " \
208                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
209                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
210                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
211                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
212 #define LIBNAME   "banias"
213 #define CORENAME  "BANIAS"
214 #endif
215
216 #ifdef FORCE_YONAH
217 #define FORCE
218 #define FORCE_INTEL
219 #define ARCHITECTURE    "X86"
220 #define SUBARCHITECTURE "YONAH"
221 #define ARCHCONFIG   "-DPENTIUMM " \
222                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
223                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
224                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
225                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
226 #define LIBNAME   "yonah"
227 #define CORENAME  "YONAH"
228 #endif
229
230 #ifdef FORCE_CORE2
231 #define FORCE
232 #define FORCE_INTEL
233 #define ARCHITECTURE    "X86"
234 #define SUBARCHITECTURE "CONRORE"
235 #define ARCHCONFIG   "-DCORE2 " \
236                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
237                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
238                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
239                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
240 #define LIBNAME   "core2"
241 #define CORENAME  "CORE2"
242 #endif
243
244 #ifdef FORCE_PENRYN
245 #define FORCE
246 #define FORCE_INTEL
247 #define ARCHITECTURE    "X86"
248 #define SUBARCHITECTURE "PENRYN"
249 #define ARCHCONFIG   "-DPENRYN " \
250                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
251                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
252                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
253                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
254 #define LIBNAME   "penryn"
255 #define CORENAME  "PENRYN"
256 #endif
257
258 #ifdef FORCE_DUNNINGTON
259 #define FORCE
260 #define FORCE_INTEL
261 #define ARCHITECTURE    "X86"
262 #define SUBARCHITECTURE "DUNNINGTON"
263 #define ARCHCONFIG   "-DDUNNINGTON " \
264                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
265                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
266                      "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
267                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
268                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
269 #define LIBNAME   "dunnington"
270 #define CORENAME  "DUNNINGTON"
271 #endif
272
273 #ifdef FORCE_NEHALEM
274 #define FORCE
275 #define FORCE_INTEL
276 #define ARCHITECTURE    "X86"
277 #define SUBARCHITECTURE "NEHALEM"
278 #define ARCHCONFIG   "-DNEHALEM " \
279                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
280                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
281                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
282                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
283 #define LIBNAME   "nehalem"
284 #define CORENAME  "NEHALEM"
285 #endif
286
287 #ifdef FORCE_SANDYBRIDGE
288 #define FORCE
289 #define FORCE_INTEL
290 #define ARCHITECTURE    "X86"
291 #define SUBARCHITECTURE "SANDYBRIDGE"
292 #define ARCHCONFIG   "-DSANDYBRIDGE " \
293                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
294                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
295                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
296                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
297 #define LIBNAME   "sandybridge"
298 #define CORENAME  "SANDYBRIDGE"
299 #endif
300
301 #ifdef FORCE_ATOM
302 #define FORCE
303 #define FORCE_INTEL
304 #define ARCHITECTURE    "X86"
305 #define SUBARCHITECTURE "ATOM"
306 #define ARCHCONFIG   "-DATOM " \
307                      "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
308                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
309                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
310                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
311 #define LIBNAME   "atom"
312 #define CORENAME  "ATOM"
313 #endif
314
315 #ifdef FORCE_ATHLON
316 #define FORCE
317 #define FORCE_INTEL
318 #define ARCHITECTURE    "X86"
319 #define SUBARCHITECTURE "ATHLON"
320 #define ARCHCONFIG   "-DATHLON " \
321                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
322                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
323                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW  " \
324                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
325 #define LIBNAME   "athlon"
326 #define CORENAME  "ATHLON"
327 #endif
328
329 #ifdef FORCE_OPTERON
330 #define FORCE
331 #define FORCE_INTEL
332 #define ARCHITECTURE    "X86"
333 #define SUBARCHITECTURE "OPTERON"
334 #define ARCHCONFIG   "-DOPTERON " \
335                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
336                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
337                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
338                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
339 #define LIBNAME   "opteron"
340 #define CORENAME  "OPTERON"
341 #endif
342
343 #ifdef FORCE_OPTERON_SSE3
344 #define FORCE
345 #define FORCE_INTEL
346 #define ARCHITECTURE    "X86"
347 #define SUBARCHITECTURE "OPTERON"
348 #define ARCHCONFIG   "-DOPTERON " \
349                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
350                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
351                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
352                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
353 #define LIBNAME   "opteron"
354 #define CORENAME  "OPTERON"
355 #endif
356
357 #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
358 #define FORCE
359 #define FORCE_INTEL
360 #define ARCHITECTURE    "X86"
361 #define SUBARCHITECTURE "BARCELONA"
362 #define ARCHCONFIG   "-DBARCELONA " \
363                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
364                      "-DL2_SIZE=524288 -DL2_LINESIZE=64  -DL3_SIZE=2097152 " \
365                      "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
366                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
367                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
368 #define LIBNAME   "barcelona"
369 #define CORENAME  "BARCELONA"
370 #endif
371
372 #if defined(FORCE_BOBCAT) 
373 #define FORCE
374 #define FORCE_INTEL
375 #define ARCHITECTURE    "X86"
376 #define SUBARCHITECTURE "BOBCAT"
377 #define ARCHCONFIG   "-DBOBCAT " \
378                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
379                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
380                      "-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
381                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
382                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
383 #define LIBNAME   "bobcat"
384 #define CORENAME  "BOBCAT"
385 #endif
386
387 #if defined (FORCE_BULLDOZER)
388 #define FORCE
389 #define FORCE_INTEL
390 #define ARCHITECTURE    "X86"
391 #define SUBARCHITECTURE "BULLDOZER"
392 #define ARCHCONFIG   "-DBULLDOZER " \
393                      "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
394                      "-DL2_SIZE=1024000 -DL2_LINESIZE=64  -DL3_SIZE=16777216 " \
395                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
396                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
397                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
398                      "-DHAVE_AVX -DHAVE_FMA4"
399 #define LIBNAME   "bulldozer"
400 #define CORENAME  "BULLDOZER"
401 #endif
402
403 #if defined (FORCE_PILEDRIVER)
404 #define FORCE
405 #define FORCE_INTEL
406 #define ARCHITECTURE    "X86"
407 #define SUBARCHITECTURE "PILEDRIVER"
408 #define ARCHCONFIG   "-DPILEDRIVER " \
409                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
410                      "-DL2_SIZE=2097152 -DL2_LINESIZE=64  -DL3_SIZE=12582912 " \
411                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
412                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
413                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
414                      "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
415 #define LIBNAME   "piledriver"
416 #define CORENAME  "PILEDRIVER"
417 #endif
418
419 #ifdef FORCE_SSE_GENERIC
420 #define FORCE
421 #define FORCE_INTEL
422 #define ARCHITECTURE    "X86"
423 #define SUBARCHITECTURE "GENERIC"
424 #define ARCHCONFIG   "-DGENERIC " \
425                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
426                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
427                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
428                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
429 #define LIBNAME   "generic"
430 #define CORENAME  "GENERIC"
431 #endif
432
433 #ifdef FORCE_VIAC3
434 #define FORCE
435 #define FORCE_INTEL
436 #define ARCHITECTURE    "X86"
437 #define SUBARCHITECTURE "VIAC3"
438 #define ARCHCONFIG   "-DVIAC3 " \
439                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
440                      "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
441                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
442                      "-DHAVE_MMX -DHAVE_SSE "
443 #define LIBNAME   "viac3"
444 #define CORENAME  "VIAC3"
445 #endif
446
447 #ifdef FORCE_NANO
448 #define FORCE
449 #define FORCE_INTEL
450 #define ARCHITECTURE    "X86"
451 #define SUBARCHITECTURE "NANO"
452 #define ARCHCONFIG   "-DNANO " \
453                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
454                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
455                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
456                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
457 #define LIBNAME   "nano"
458 #define CORENAME  "NANO"
459 #endif
460
461 #ifdef FORCE_POWER3
462 #define FORCE
463 #define ARCHITECTURE    "POWER"
464 #define SUBARCHITECTURE "POWER3"
465 #define SUBDIRNAME      "power"
466 #define ARCHCONFIG   "-DPOWER3 " \
467                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
468                      "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
469                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
470 #define LIBNAME   "power3"
471 #define CORENAME  "POWER3"
472 #endif
473
474 #ifdef FORCE_POWER4
475 #define FORCE
476 #define ARCHITECTURE    "POWER"
477 #define SUBARCHITECTURE "POWER4"
478 #define SUBDIRNAME      "power"
479 #define ARCHCONFIG   "-DPOWER4 " \
480                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
481                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
482                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
483 #define LIBNAME   "power4"
484 #define CORENAME  "POWER4"
485 #endif
486
487 #ifdef FORCE_POWER5
488 #define FORCE
489 #define ARCHITECTURE    "POWER"
490 #define SUBARCHITECTURE "POWER5"
491 #define SUBDIRNAME      "power"
492 #define ARCHCONFIG   "-DPOWER5 " \
493                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
494                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
495                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
496 #define LIBNAME   "power5"
497 #define CORENAME  "POWER5"
498 #endif
499
500 #ifdef FORCE_POWER6
501 #define FORCE
502 #define ARCHITECTURE    "POWER"
503 #define SUBARCHITECTURE "POWER6"
504 #define SUBDIRNAME      "power"
505 #define ARCHCONFIG   "-DPOWER6 " \
506                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
507                      "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
508                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
509 #define LIBNAME   "power6"
510 #define CORENAME  "POWER6"
511 #endif
512
513 #ifdef FORCE_PPCG4
514 #define FORCE
515 #define ARCHITECTURE    "POWER"
516 #define SUBARCHITECTURE "PPCG4"
517 #define SUBDIRNAME      "power"
518 #define ARCHCONFIG   "-DPPCG4 " \
519                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
520                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
521                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
522 #define LIBNAME   "ppcg4"
523 #define CORENAME  "PPCG4"
524 #endif
525
526 #ifdef FORCE_PPC970
527 #define FORCE
528 #define ARCHITECTURE    "POWER"
529 #define SUBARCHITECTURE "PPC970"
530 #define SUBDIRNAME      "power"
531 #define ARCHCONFIG   "-DPPC970 " \
532                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
533                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
534                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
535 #define LIBNAME   "ppc970"
536 #define CORENAME  "PPC970"
537 #endif
538
539 #ifdef FORCE_PPC970MP
540 #define FORCE
541 #define ARCHITECTURE    "POWER"
542 #define SUBARCHITECTURE "PPC970"
543 #define SUBDIRNAME      "power"
544 #define ARCHCONFIG   "-DPPC970 " \
545                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
546                      "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
547                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
548 #define LIBNAME   "ppc970mp"
549 #define CORENAME  "PPC970"
550 #endif
551
552 #ifdef FORCE_PPC440
553 #define FORCE
554 #define ARCHITECTURE    "POWER"
555 #define SUBARCHITECTURE "PPC440"
556 #define SUBDIRNAME      "power"
557 #define ARCHCONFIG   "-DPPC440 " \
558                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
559                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
560                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
561 #define LIBNAME   "ppc440"
562 #define CORENAME  "PPC440"
563 #endif
564
565 #ifdef FORCE_PPC440FP2
566 #define FORCE
567 #define ARCHITECTURE    "POWER"
568 #define SUBARCHITECTURE "PPC440FP2"
569 #define SUBDIRNAME      "power"
570 #define ARCHCONFIG   "-DPPC440FP2 " \
571                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
572                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
573                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
574 #define LIBNAME   "ppc440FP2"
575 #define CORENAME  "PPC440FP2"
576 #endif
577
578 #ifdef FORCE_CELL
579 #define FORCE
580 #define ARCHITECTURE    "POWER"
581 #define SUBARCHITECTURE "CELL"
582 #define SUBDIRNAME      "power"
583 #define ARCHCONFIG   "-DCELL " \
584                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
585                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
586                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
587 #define LIBNAME   "cell"
588 #define CORENAME  "CELL"
589 #endif
590
591 #ifdef FORCE_SICORTEX
592 #define FORCE
593 #define ARCHITECTURE    "MIPS"
594 #define SUBARCHITECTURE "SICORTEX"
595 #define SUBDIRNAME      "mips"
596 #define ARCHCONFIG   "-DSICORTEX " \
597                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
598                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
599                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
600 #define LIBNAME   "mips"
601 #define CORENAME  "sicortex"
602 #endif
603
604
605 #ifdef FORCE_LOONGSON3A
606 #define FORCE
607 #define ARCHITECTURE    "MIPS"
608 #define SUBARCHITECTURE "LOONGSON3A"
609 #define SUBDIRNAME      "mips64"
610 #define ARCHCONFIG   "-DLOONGSON3A " \
611        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
612        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
613        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
614 #define LIBNAME   "loongson3a"
615 #define CORENAME  "LOONGSON3A"
616 #else
617 #endif
618
619 #ifdef FORCE_LOONGSON3B
620 #define FORCE
621 #define ARCHITECTURE    "MIPS"
622 #define SUBARCHITECTURE "LOONGSON3B"
623 #define SUBDIRNAME      "mips64"
624 #define ARCHCONFIG   "-DLOONGSON3B " \
625        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
626        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
627        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
628 #define LIBNAME   "loongson3b"
629 #define CORENAME  "LOONGSON3B"
630 #else
631 #endif
632
633 #ifdef FORCE_ITANIUM2
634 #define FORCE
635 #define ARCHITECTURE    "IA64"
636 #define SUBARCHITECTURE "ITANIUM2"
637 #define SUBDIRNAME      "ia64"
638 #define ARCHCONFIG   "-DITANIUM2 " \
639                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
640                      "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
641 #define LIBNAME   "itanium2"
642 #define CORENAME  "itanium2"
643 #endif
644
645 #ifdef FORCE_SPARC
646 #define FORCE
647 #define ARCHITECTURE    "SPARC"
648 #define SUBARCHITECTURE "SPARC"
649 #define SUBDIRNAME      "sparc"
650 #define ARCHCONFIG   "-DSPARC -DV9 " \
651                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
652                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
653 #define LIBNAME   "sparc"
654 #define CORENAME  "sparc"
655 #endif
656
657 #ifdef FORCE_SPARCV7
658 #define FORCE
659 #define ARCHITECTURE    "SPARC"
660 #define SUBARCHITECTURE "SPARC"
661 #define SUBDIRNAME      "sparc"
662 #define ARCHCONFIG   "-DSPARC -DV7 " \
663                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
664                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
665 #define LIBNAME   "sparcv7"
666 #define CORENAME  "sparcv7"
667 #endif
668
669 #ifdef FORCE_GENERIC
670 #define FORCE
671 #define ARCHITECTURE    "GENERIC"
672 #define SUBARCHITECTURE "GENERIC"
673 #define SUBDIRNAME      "generic"
674 #define ARCHCONFIG   "-DGENERIC " \
675                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
676                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
677                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
678 #define LIBNAME   "generic"
679 #define CORENAME  "generic"
680 #endif
681
682 #ifdef FORCE_ARMV7
683 #define FORCE
684 #define ARCHITECTURE    "ARM"
685 #define SUBARCHITECTURE "ARMV7"
686 #define SUBDIRNAME      "arm"
687 #define ARCHCONFIG   "-DARMV7 " \
688        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
689        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
690        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
691        "-DHAVE_VFPV3 -DHAVE_VFP"
692 #define LIBNAME   "armv7"
693 #define CORENAME  "ARMV7"
694 #else
695 #endif
696
697 #ifdef FORCE_ARMV6
698 #define FORCE
699 #define ARCHITECTURE    "ARM"
700 #define SUBARCHITECTURE "ARMV6"
701 #define SUBDIRNAME      "arm"
702 #define ARCHCONFIG   "-DARMV6 " \
703        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
704        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
705        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
706        "-DHAVE_VFP"
707 #define LIBNAME   "armv6"
708 #define CORENAME  "ARMV6"
709 #else
710 #endif
711
712 #ifdef FORCE_ARMV8
713 #define FORCE
714 #define ARCHITECTURE    "ARM64"
715 #define SUBARCHITECTURE "ARMV8"
716 #define SUBDIRNAME      "arm64"
717 #define ARCHCONFIG   "-DARMV8 " \
718        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
719        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
720        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
721        "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4"
722 #define LIBNAME   "armv8"
723 #define CORENAME  "ARMV8"
724 #else
725 #endif
726
727
728 #ifndef FORCE
729
730 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
731     defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
732 #ifndef POWER
733 #define POWER
734 #endif
735 #define OPENBLAS_SUPPORTED
736 #endif
737
738 #if defined(__i386__) || (__x86_64__)
739 #include "cpuid_x86.c"
740 #define OPENBLAS_SUPPORTED
741 #endif
742
743 #ifdef __ia64__
744 #include "cpuid_ia64.c"
745 #define OPENBLAS_SUPPORTED
746 #endif
747
748 #ifdef __alpha
749 #include "cpuid_alpha.c"
750 #define OPENBLAS_SUPPORTED
751 #endif
752
753 #ifdef POWER
754 #include "cpuid_power.c"
755 #define OPENBLAS_SUPPORTED
756 #endif
757
758 #ifdef sparc
759 #include "cpuid_sparc.c"
760 #define OPENBLAS_SUPPORTED
761 #endif
762
763 #ifdef __mips__
764 #include "cpuid_mips.c"
765 #define OPENBLAS_SUPPORTED
766 #endif
767
768 #ifdef __arm__
769 #include "cpuid_arm.c"
770 #define OPENBLAS_SUPPORTED
771 #endif
772
773
774 #ifndef OPENBLAS_SUPPORTED
775 #error "This arch/CPU is not supported by OpenBLAS."
776 #endif
777
778 #else
779
780 #endif
781
782 static int get_num_cores(void) {
783
784 #ifdef OS_WINDOWS
785   SYSTEM_INFO sysinfo;
786 #elif defined(__FreeBSD__) || defined(__APPLE__)
787   int m[2], count;
788   size_t len;
789 #endif
790   
791 #ifdef linux
792   //returns the number of processors which are currently online
793   return sysconf(_SC_NPROCESSORS_ONLN);
794   
795 #elif defined(OS_WINDOWS)
796
797   GetSystemInfo(&sysinfo);
798   return sysinfo.dwNumberOfProcessors;
799
800 #elif defined(__FreeBSD__) || defined(__APPLE__)
801   m[0] = CTL_HW;
802   m[1] = HW_NCPU;
803   len = sizeof(int);
804   sysctl(m, 2, &count, &len, NULL, 0);
805
806   return count;
807 #else
808   return 2;
809 #endif
810 }
811
812 int main(int argc, char *argv[]){
813
814 #ifdef FORCE
815   char buffer[8192], *p, *q;
816   int length;
817 #endif
818
819   if (argc == 1) return 0;
820
821   switch (argv[1][0]) {
822
823   case '0' : /* for Makefile */
824
825 #ifdef FORCE
826     printf("CORE=%s\n", CORENAME);
827 #else    
828 #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
829     printf("CORE=%s\n", get_corename());
830 #endif
831 #endif
832
833 #ifdef FORCE
834     printf("LIBCORE=%s\n", LIBNAME);
835 #else
836     printf("LIBCORE=");
837     get_libname();
838     printf("\n");
839 #endif
840
841     printf("NUM_CORES=%d\n", get_num_cores());
842
843 #if defined(__arm__) && !defined(FORCE)
844         get_features();
845 #endif
846
847
848 #if defined(__i386__) || defined(__x86_64__)
849 #ifndef FORCE
850     get_sse();
851 #else
852
853     sprintf(buffer, "%s", ARCHCONFIG);
854
855     p = &buffer[0];
856
857     while (*p) {
858       if ((*p == '-') && (*(p + 1) == 'D')) {
859         p += 2;
860
861         while ((*p != ' ') && (*p != '\0')) {
862
863           if (*p == '=') {
864             printf("=");
865             p ++;
866             while ((*p != ' ') && (*p != '\0')) {
867               printf("%c", *p);
868               p ++;
869             }
870           } else {
871             printf("%c", *p);
872             p ++;
873             if ((*p == ' ') || (*p =='\0')) printf("=1");
874           }
875         }
876
877         printf("\n");
878       } else p ++;
879     }
880 #endif
881 #endif
882
883 #if NO_PARALLEL_MAKE==1
884     printf("MAKE += -j 1\n");
885 #else
886 #ifndef OS_WINDOWS
887     printf("MAKE += -j %d\n", get_num_cores());
888 #endif
889 #endif
890
891     break;
892
893   case '1' : /* For config.h */
894 #ifdef FORCE
895     sprintf(buffer, "%s -DCORE_%s\n", ARCHCONFIG, CORENAME);
896
897     p = &buffer[0];
898     while (*p) {
899       if ((*p == '-') && (*(p + 1) == 'D')) {
900         p += 2;
901         printf("#define ");
902
903         while ((*p != ' ') && (*p != '\0')) {
904
905           if (*p == '=') {
906             printf(" ");
907             p ++;
908             while ((*p != ' ') && (*p != '\0')) {
909               printf("%c", *p);
910               p ++;
911             }
912           } else {
913             printf("%c", *p);
914             p ++;
915           }
916         }
917
918         printf("\n");
919       } else p ++;
920     }
921 #else
922     get_cpuconfig();
923 #endif
924  break;
925
926   case '2' : /* SMP */
927     if (get_num_cores() > 1) printf("SMP=1\n");
928     break;
929   }
930
931   fflush(stdout);
932
933   return 0;
934 }
935