# The first commit's message is:
[platform/upstream/openblas.git] / getarch.c
1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the
15       distribution.
16    3. Neither the name of the ISCAS nor the names of its contributors may
17       be used to endorse or promote products derived from this software
18       without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 **********************************************************************************/
32
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin.           */
35 /* All rights reserved.                                              */
36 /*                                                                   */
37 /* Redistribution and use in source and binary forms, with or        */
38 /* without modification, are permitted provided that the following   */
39 /* conditions are met:                                               */
40 /*                                                                   */
41 /*   1. Redistributions of source code must retain the above         */
42 /*      copyright notice, this list of conditions and the following  */
43 /*      disclaimer.                                                  */
44 /*                                                                   */
45 /*   2. Redistributions in binary form must reproduce the above      */
46 /*      copyright notice, this list of conditions and the following  */
47 /*      disclaimer in the documentation and/or other materials       */
48 /*      provided with the distribution.                              */
49 /*                                                                   */
50 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
51 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
52 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
53 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
54 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
55 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
56 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
57 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
58 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
59 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
60 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
61 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
62 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
63 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
64 /*                                                                   */
65 /* The views and conclusions contained in the software and           */
66 /* documentation are those of the authors and should not be          */
67 /* interpreted as representing official policies, either expressed   */
68 /* or implied, of The University of Texas at Austin.                 */
69 /*********************************************************************/
70
71 #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
72 #define OS_WINDOWS
73 #endif
74
75 #include <stdio.h>
76 #include <string.h>
77 #ifdef OS_WINDOWS
78 #include <windows.h>
79 #endif
80 #if defined(__FreeBSD__) || defined(__APPLE__)
81 #include <sys/types.h>
82 #include <sys/sysctl.h>
83 #endif
84 #ifdef linux
85 #include <sys/sysinfo.h>
86 #include <unistd.h>
87 #endif
88
89 /* #define FORCE_P2             */
90 /* #define FORCE_KATMAI         */
91 /* #define FORCE_COPPERMINE     */
92 /* #define FORCE_NORTHWOOD      */
93 /* #define FORCE_PRESCOTT       */
94 /* #define FORCE_BANIAS         */
95 /* #define FORCE_YONAH          */
96 /* #define FORCE_CORE2          */
97 /* #define FORCE_PENRYN         */
98 /* #define FORCE_DUNNINGTON     */
99 /* #define FORCE_NEHALEM        */
100 /* #define FORCE_SANDYBRIDGE    */
101 /* #define FORCE_ATOM           */
102 /* #define FORCE_ATHLON         */
103 /* #define FORCE_OPTERON        */
104 /* #define FORCE_OPTERON_SSE3   */
105 /* #define FORCE_BARCELONA      */
106 /* #define FORCE_SHANGHAI       */
107 /* #define FORCE_ISTANBUL       */
108 /* #define FORCE_BOBCAT         */
109 /* #define FORCE_BULLDOZER      */
110 /* #define FORCE_PILEDRIVER     */
111 /* #define FORCE_SSE_GENERIC    */
112 /* #define FORCE_VIAC3          */
113 /* #define FORCE_NANO           */
114 /* #define FORCE_POWER3         */
115 /* #define FORCE_POWER4         */
116 /* #define FORCE_POWER5         */
117 /* #define FORCE_POWER6         */
118 /* #define FORCE_PPCG4          */
119 /* #define FORCE_PPC970         */
120 /* #define FORCE_PPC970MP       */
121 /* #define FORCE_PPC440         */
122 /* #define FORCE_PPC440FP2      */
123 /* #define FORCE_CELL           */
124 /* #define FORCE_SICORTEX       */
125 /* #define FORCE_LOONGSON3A     */
126 /* #define FORCE_LOONGSON3B     */
127 /* #define FORCE_ITANIUM2       */
128 /* #define FORCE_SPARC          */
129 /* #define FORCE_SPARCV7        */
130 /* #define FORCE_GENERIC        */
131
132 #ifdef FORCE_P2
133 #define FORCE
134 #define FORCE_INTEL
135 #define ARCHITECTURE    "X86"
136 #define SUBARCHITECTURE "PENTIUM2"
137 #define ARCHCONFIG   "-DPENTIUM2 " \
138                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
139                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
140                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
141                      "-DHAVE_CMOV -DHAVE_MMX"
142 #define LIBNAME   "p2"
143 #define CORENAME  "P5"
144 #endif
145
146 #ifdef FORCE_KATMAI
147 #define FORCE
148 #define FORCE_INTEL
149 #define ARCHITECTURE    "X86"
150 #define SUBARCHITECTURE "PENTIUM3"
151 #define ARCHCONFIG   "-DPENTIUM3 " \
152                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
153                      "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
154                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
155                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
156 #define LIBNAME   "katmai"
157 #define CORENAME  "KATMAI"
158 #endif
159
160 #ifdef FORCE_COPPERMINE
161 #define FORCE
162 #define FORCE_INTEL
163 #define ARCHITECTURE    "X86"
164 #define SUBARCHITECTURE "PENTIUM3"
165 #define ARCHCONFIG   "-DPENTIUM3 " \
166                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
167                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
168                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
169                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
170 #define LIBNAME   "coppermine"
171 #define CORENAME  "COPPERMINE"
172 #endif
173
174 #ifdef FORCE_NORTHWOOD
175 #define FORCE
176 #define FORCE_INTEL
177 #define ARCHITECTURE    "X86"
178 #define SUBARCHITECTURE "PENTIUM4"
179 #define ARCHCONFIG   "-DPENTIUM4 " \
180                      "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
181                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
182                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
183                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
184 #define LIBNAME   "northwood"
185 #define CORENAME  "NORTHWOOD"
186 #endif
187
188 #ifdef FORCE_PRESCOTT
189 #define FORCE
190 #define FORCE_INTEL
191 #define ARCHITECTURE    "X86"
192 #define SUBARCHITECTURE "PENTIUM4"
193 #define ARCHCONFIG   "-DPENTIUM4 " \
194                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
195                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
196                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
197                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
198 #define LIBNAME   "prescott"
199 #define CORENAME  "PRESCOTT"
200 #endif
201
202 #ifdef FORCE_BANIAS
203 #define FORCE
204 #define FORCE_INTEL
205 #define ARCHITECTURE    "X86"
206 #define SUBARCHITECTURE "BANIAS"
207 #define ARCHCONFIG   "-DPENTIUMM " \
208                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
209                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
210                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
211                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
212 #define LIBNAME   "banias"
213 #define CORENAME  "BANIAS"
214 #endif
215
216 #ifdef FORCE_YONAH
217 #define FORCE
218 #define FORCE_INTEL
219 #define ARCHITECTURE    "X86"
220 #define SUBARCHITECTURE "YONAH"
221 #define ARCHCONFIG   "-DPENTIUMM " \
222                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
223                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
224                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
225                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
226 #define LIBNAME   "yonah"
227 #define CORENAME  "YONAH"
228 #endif
229
230 #ifdef FORCE_CORE2
231 #define FORCE
232 #define FORCE_INTEL
233 #define ARCHITECTURE    "X86"
234 #define SUBARCHITECTURE "CONRORE"
235 #define ARCHCONFIG   "-DCORE2 " \
236                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
237                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
238                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
239                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
240 #define LIBNAME   "core2"
241 #define CORENAME  "CORE2"
242 #endif
243
244 #ifdef FORCE_PENRYN
245 #define FORCE
246 #define FORCE_INTEL
247 #define ARCHITECTURE    "X86"
248 #define SUBARCHITECTURE "PENRYN"
249 #define ARCHCONFIG   "-DPENRYN " \
250                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
251                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
252                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
253                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
254 #define LIBNAME   "penryn"
255 #define CORENAME  "PENRYN"
256 #endif
257
258 #ifdef FORCE_DUNNINGTON
259 #define FORCE
260 #define FORCE_INTEL
261 #define ARCHITECTURE    "X86"
262 #define SUBARCHITECTURE "DUNNINGTON"
263 #define ARCHCONFIG   "-DDUNNINGTON " \
264                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
265                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
266                      "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
267                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
268                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
269 #define LIBNAME   "dunnington"
270 #define CORENAME  "DUNNINGTON"
271 #endif
272
273 #ifdef FORCE_NEHALEM
274 #define FORCE
275 #define FORCE_INTEL
276 #define ARCHITECTURE    "X86"
277 #define SUBARCHITECTURE "NEHALEM"
278 #define ARCHCONFIG   "-DNEHALEM " \
279                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
280                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
281                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
282                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
283 #define LIBNAME   "nehalem"
284 #define CORENAME  "NEHALEM"
285 #endif
286
287 #ifdef FORCE_SANDYBRIDGE
288 #define FORCE
289 #define FORCE_INTEL
290 #define ARCHITECTURE    "X86"
291 #define SUBARCHITECTURE "SANDYBRIDGE"
292 #define ARCHCONFIG   "-DSANDYBRIDGE " \
293                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
294                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
295                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
296                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
297 #define LIBNAME   "sandybridge"
298 #define CORENAME  "SANDYBRIDGE"
299 #endif
300
301 #ifdef FORCE_HASWELL
302 #define FORCE
303 #define FORCE_INTEL
304 #define ARCHITECTURE    "X86"
305 #define SUBARCHITECTURE "HASWELL"
306 #define ARCHCONFIG   "-DHASWELL " \
307                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
308                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
309                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
310                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
311                      "-DFMA3"
312 #define LIBNAME   "haswell"
313 #define CORENAME  "HASWELL"
314 #endif
315
316 #ifdef FORCE_ATOM
317 #define FORCE
318 #define FORCE_INTEL
319 #define ARCHITECTURE    "X86"
320 #define SUBARCHITECTURE "ATOM"
321 #define ARCHCONFIG   "-DATOM " \
322                      "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
323                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
324                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
325                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
326 #define LIBNAME   "atom"
327 #define CORENAME  "ATOM"
328 #endif
329
330 #ifdef FORCE_ATHLON
331 #define FORCE
332 #define FORCE_INTEL
333 #define ARCHITECTURE    "X86"
334 #define SUBARCHITECTURE "ATHLON"
335 #define ARCHCONFIG   "-DATHLON " \
336                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
337                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
338                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW  " \
339                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
340 #define LIBNAME   "athlon"
341 #define CORENAME  "ATHLON"
342 #endif
343
344 #ifdef FORCE_OPTERON
345 #define FORCE
346 #define FORCE_INTEL
347 #define ARCHITECTURE    "X86"
348 #define SUBARCHITECTURE "OPTERON"
349 #define ARCHCONFIG   "-DOPTERON " \
350                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
351                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
352                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
353                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
354 #define LIBNAME   "opteron"
355 #define CORENAME  "OPTERON"
356 #endif
357
358 #ifdef FORCE_OPTERON_SSE3
359 #define FORCE
360 #define FORCE_INTEL
361 #define ARCHITECTURE    "X86"
362 #define SUBARCHITECTURE "OPTERON"
363 #define ARCHCONFIG   "-DOPTERON " \
364                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
365                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
366                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
367                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
368 #define LIBNAME   "opteron"
369 #define CORENAME  "OPTERON"
370 #endif
371
372 #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
373 #define FORCE
374 #define FORCE_INTEL
375 #define ARCHITECTURE    "X86"
376 #define SUBARCHITECTURE "BARCELONA"
377 #define ARCHCONFIG   "-DBARCELONA " \
378                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
379                      "-DL2_SIZE=524288 -DL2_LINESIZE=64  -DL3_SIZE=2097152 " \
380                      "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
381                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
382                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
383 #define LIBNAME   "barcelona"
384 #define CORENAME  "BARCELONA"
385 #endif
386
387 #if defined(FORCE_BOBCAT)
388 #define FORCE
389 #define FORCE_INTEL
390 #define ARCHITECTURE    "X86"
391 #define SUBARCHITECTURE "BOBCAT"
392 #define ARCHCONFIG   "-DBOBCAT " \
393                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
394                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
395                      "-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
396                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
397                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
398 #define LIBNAME   "bobcat"
399 #define CORENAME  "BOBCAT"
400 #endif
401
402 #if defined (FORCE_BULLDOZER)
403 #define FORCE
404 #define FORCE_INTEL
405 #define ARCHITECTURE    "X86"
406 #define SUBARCHITECTURE "BULLDOZER"
407 #define ARCHCONFIG   "-DBULLDOZER " \
408                      "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
409                      "-DL2_SIZE=1024000 -DL2_LINESIZE=64  -DL3_SIZE=16777216 " \
410                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
411                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
412                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
413                      "-DHAVE_AVX -DHAVE_FMA4"
414 #define LIBNAME   "bulldozer"
415 #define CORENAME  "BULLDOZER"
416 #endif
417
418 #if defined (FORCE_PILEDRIVER)
419 #define FORCE
420 #define FORCE_INTEL
421 #define ARCHITECTURE    "X86"
422 #define SUBARCHITECTURE "PILEDRIVER"
423 #define ARCHCONFIG   "-DPILEDRIVER " \
424                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
425                      "-DL2_SIZE=2097152 -DL2_LINESIZE=64  -DL3_SIZE=12582912 " \
426                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
427                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
428                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
429                      "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
430 #define LIBNAME   "piledriver"
431 #define CORENAME  "PILEDRIVER"
432 #endif
433
434 #ifdef FORCE_SSE_GENERIC
435 #define FORCE
436 #define FORCE_INTEL
437 #define ARCHITECTURE    "X86"
438 #define SUBARCHITECTURE "GENERIC"
439 #define ARCHCONFIG   "-DGENERIC " \
440                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
441                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
442                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
443                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
444 #define LIBNAME   "generic"
445 #define CORENAME  "GENERIC"
446 #endif
447
448 #ifdef FORCE_VIAC3
449 #define FORCE
450 #define FORCE_INTEL
451 #define ARCHITECTURE    "X86"
452 #define SUBARCHITECTURE "VIAC3"
453 #define ARCHCONFIG   "-DVIAC3 " \
454                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
455                      "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
456                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
457                      "-DHAVE_MMX -DHAVE_SSE "
458 #define LIBNAME   "viac3"
459 #define CORENAME  "VIAC3"
460 #endif
461
462 #ifdef FORCE_NANO
463 #define FORCE
464 #define FORCE_INTEL
465 #define ARCHITECTURE    "X86"
466 #define SUBARCHITECTURE "NANO"
467 #define ARCHCONFIG   "-DNANO " \
468                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
469                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
470                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
471                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
472 #define LIBNAME   "nano"
473 #define CORENAME  "NANO"
474 #endif
475
476 #ifdef FORCE_POWER3
477 #define FORCE
478 #define ARCHITECTURE    "POWER"
479 #define SUBARCHITECTURE "POWER3"
480 #define SUBDIRNAME      "power"
481 #define ARCHCONFIG   "-DPOWER3 " \
482                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
483                      "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
484                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
485 #define LIBNAME   "power3"
486 #define CORENAME  "POWER3"
487 #endif
488
489 #ifdef FORCE_POWER4
490 #define FORCE
491 #define ARCHITECTURE    "POWER"
492 #define SUBARCHITECTURE "POWER4"
493 #define SUBDIRNAME      "power"
494 #define ARCHCONFIG   "-DPOWER4 " \
495                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
496                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
497                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
498 #define LIBNAME   "power4"
499 #define CORENAME  "POWER4"
500 #endif
501
502 #ifdef FORCE_POWER5
503 #define FORCE
504 #define ARCHITECTURE    "POWER"
505 #define SUBARCHITECTURE "POWER5"
506 #define SUBDIRNAME      "power"
507 #define ARCHCONFIG   "-DPOWER5 " \
508                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
509                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
510                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
511 #define LIBNAME   "power5"
512 #define CORENAME  "POWER5"
513 #endif
514
515 #ifdef FORCE_POWER6
516 #define FORCE
517 #define ARCHITECTURE    "POWER"
518 #define SUBARCHITECTURE "POWER6"
519 #define SUBDIRNAME      "power"
520 #define ARCHCONFIG   "-DPOWER6 " \
521                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
522                      "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
523                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
524 #define LIBNAME   "power6"
525 #define CORENAME  "POWER6"
526 #endif
527
528 #ifdef FORCE_PPCG4
529 #define FORCE
530 #define ARCHITECTURE    "POWER"
531 #define SUBARCHITECTURE "PPCG4"
532 #define SUBDIRNAME      "power"
533 #define ARCHCONFIG   "-DPPCG4 " \
534                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
535                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
536                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
537 #define LIBNAME   "ppcg4"
538 #define CORENAME  "PPCG4"
539 #endif
540
541 #ifdef FORCE_PPC970
542 #define FORCE
543 #define ARCHITECTURE    "POWER"
544 #define SUBARCHITECTURE "PPC970"
545 #define SUBDIRNAME      "power"
546 #define ARCHCONFIG   "-DPPC970 " \
547                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
548                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
549                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
550 #define LIBNAME   "ppc970"
551 #define CORENAME  "PPC970"
552 #endif
553
554 #ifdef FORCE_PPC970MP
555 #define FORCE
556 #define ARCHITECTURE    "POWER"
557 #define SUBARCHITECTURE "PPC970"
558 #define SUBDIRNAME      "power"
559 #define ARCHCONFIG   "-DPPC970 " \
560                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
561                      "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
562                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
563 #define LIBNAME   "ppc970mp"
564 #define CORENAME  "PPC970"
565 #endif
566
567 #ifdef FORCE_PPC440
568 #define FORCE
569 #define ARCHITECTURE    "POWER"
570 #define SUBARCHITECTURE "PPC440"
571 #define SUBDIRNAME      "power"
572 #define ARCHCONFIG   "-DPPC440 " \
573                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
574                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
575                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
576 #define LIBNAME   "ppc440"
577 #define CORENAME  "PPC440"
578 #endif
579
580 #ifdef FORCE_PPC440FP2
581 #define FORCE
582 #define ARCHITECTURE    "POWER"
583 #define SUBARCHITECTURE "PPC440FP2"
584 #define SUBDIRNAME      "power"
585 #define ARCHCONFIG   "-DPPC440FP2 " \
586                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
587                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
588                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
589 #define LIBNAME   "ppc440FP2"
590 #define CORENAME  "PPC440FP2"
591 #endif
592
593 #ifdef FORCE_CELL
594 #define FORCE
595 #define ARCHITECTURE    "POWER"
596 #define SUBARCHITECTURE "CELL"
597 #define SUBDIRNAME      "power"
598 #define ARCHCONFIG   "-DCELL " \
599                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
600                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
601                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
602 #define LIBNAME   "cell"
603 #define CORENAME  "CELL"
604 #endif
605
606 #ifdef FORCE_SICORTEX
607 #define FORCE
608 #define ARCHITECTURE    "MIPS"
609 #define SUBARCHITECTURE "SICORTEX"
610 #define SUBDIRNAME      "mips"
611 #define ARCHCONFIG   "-DSICORTEX " \
612                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
613                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
614                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
615 #define LIBNAME   "mips"
616 #define CORENAME  "sicortex"
617 #endif
618
619
620 #ifdef FORCE_LOONGSON3A
621 #define FORCE
622 #define ARCHITECTURE    "MIPS"
623 #define SUBARCHITECTURE "LOONGSON3A"
624 #define SUBDIRNAME      "mips64"
625 #define ARCHCONFIG   "-DLOONGSON3A " \
626        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
627        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
628        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
629 #define LIBNAME   "loongson3a"
630 #define CORENAME  "LOONGSON3A"
631 #else
632 #endif
633
634 #ifdef FORCE_LOONGSON3B
635 #define FORCE
636 #define ARCHITECTURE    "MIPS"
637 #define SUBARCHITECTURE "LOONGSON3B"
638 #define SUBDIRNAME      "mips64"
639 #define ARCHCONFIG   "-DLOONGSON3B " \
640        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
641        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
642        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
643 #define LIBNAME   "loongson3b"
644 #define CORENAME  "LOONGSON3B"
645 #else
646 #endif
647
648 #ifdef FORCE_ITANIUM2
649 #define FORCE
650 #define ARCHITECTURE    "IA64"
651 #define SUBARCHITECTURE "ITANIUM2"
652 #define SUBDIRNAME      "ia64"
653 #define ARCHCONFIG   "-DITANIUM2 " \
654                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
655                      "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
656 #define LIBNAME   "itanium2"
657 #define CORENAME  "itanium2"
658 #endif
659
660 #ifdef FORCE_SPARC
661 #define FORCE
662 #define ARCHITECTURE    "SPARC"
663 #define SUBARCHITECTURE "SPARC"
664 #define SUBDIRNAME      "sparc"
665 #define ARCHCONFIG   "-DSPARC -DV9 " \
666                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
667                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
668 #define LIBNAME   "sparc"
669 #define CORENAME  "sparc"
670 #endif
671
672 #ifdef FORCE_SPARCV7
673 #define FORCE
674 #define ARCHITECTURE    "SPARC"
675 #define SUBARCHITECTURE "SPARC"
676 #define SUBDIRNAME      "sparc"
677 #define ARCHCONFIG   "-DSPARC -DV7 " \
678                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
679                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
680 #define LIBNAME   "sparcv7"
681 #define CORENAME  "sparcv7"
682 #endif
683
684 #ifdef FORCE_GENERIC
685 #define FORCE
686 #define ARCHITECTURE    "GENERIC"
687 #define SUBARCHITECTURE "GENERIC"
688 #define SUBDIRNAME      "generic"
689 #define ARCHCONFIG   "-DGENERIC " \
690                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
691                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
692                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
693 #define LIBNAME   "generic"
694 #define CORENAME  "generic"
695 #endif
696
697 #ifdef FORCE_ARMV7
698 #define FORCE
699 #define ARCHITECTURE    "ARM"
700 #define SUBARCHITECTURE "ARMV7"
701 #define SUBDIRNAME      "arm"
702 #define ARCHCONFIG   "-DARMV7 " \
703        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
704        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
705        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
706        "-DHAVE_VFPV3 -DHAVE_VFP"
707 #define LIBNAME   "armv7"
708 #define CORENAME  "ARMV7"
709 #else
710 #endif
711
712 #ifdef FORCE_ARMV6
713 #define FORCE
714 #define ARCHITECTURE    "ARM"
715 #define SUBARCHITECTURE "ARMV6"
716 #define SUBDIRNAME      "arm"
717 #define ARCHCONFIG   "-DARMV6 " \
718        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
719        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
720        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
721        "-DHAVE_VFP"
722 #define LIBNAME   "armv6"
723 #define CORENAME  "ARMV6"
724 #else
725 #endif
726
727 #ifdef FORCE_ARMV5
728 #define FORCE
729 #define ARCHITECTURE    "ARM"
730 #define SUBARCHITECTURE "ARMV5"
731 #define SUBDIRNAME      "arm"
732 #define ARCHCONFIG   "-DARMV5 " \
733        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
734        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
735        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
736        "-DHAVE_VFP"
737 #define LIBNAME   "armv5"
738 #define CORENAME  "ARMV5"
739 #else
740 #endif
741
742
743 #ifdef FORCE_ARMV8
744 #define FORCE
745 #define ARCHITECTURE    "ARM64"
746 #define SUBARCHITECTURE "ARMV8"
747 #define SUBDIRNAME      "arm64"
748 #define ARCHCONFIG   "-DARMV8 " \
749        "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
750        "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
751        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " 
752 #define LIBNAME   "armv8"
753 #define CORENAME  "XGENE1"
754 #else
755 #endif
756
757
758 #ifndef FORCE
759
760 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
761     defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
762 #ifndef POWER
763 #define POWER
764 #endif
765 #define OPENBLAS_SUPPORTED
766 #endif
767
768 #if defined(__i386__) || (__x86_64__)
769 #include "cpuid_x86.c"
770 #define OPENBLAS_SUPPORTED
771 #endif
772
773 #ifdef __ia64__
774 #include "cpuid_ia64.c"
775 #define OPENBLAS_SUPPORTED
776 #endif
777
778 #ifdef __alpha
779 #include "cpuid_alpha.c"
780 #define OPENBLAS_SUPPORTED
781 #endif
782
783 #ifdef POWER
784 #include "cpuid_power.c"
785 #define OPENBLAS_SUPPORTED
786 #endif
787
788 #ifdef sparc
789 #include "cpuid_sparc.c"
790 #define OPENBLAS_SUPPORTED
791 #endif
792
793 #ifdef __mips__
794 #include "cpuid_mips.c"
795 #define OPENBLAS_SUPPORTED
796 #endif
797
798 #ifdef __arm__
799 #include "cpuid_arm.c"
800 #define OPENBLAS_SUPPORTED
801 #endif
802
803 #ifdef __aarch64__
804 #include "cpuid_arm64.c"
805 #define OPENBLAS_SUPPORTED
806 #endif
807
808
809 #ifndef OPENBLAS_SUPPORTED
810 #error "This arch/CPU is not supported by OpenBLAS."
811 #endif
812
813 #else
814
815 #endif
816
817 static int get_num_cores(void) {
818
819 #ifdef OS_WINDOWS
820   SYSTEM_INFO sysinfo;
821 #elif defined(__FreeBSD__) || defined(__APPLE__)
822   int m[2], count;
823   size_t len;
824 #endif
825
826 #ifdef linux
827   //returns the number of processors which are currently online
828   return sysconf(_SC_NPROCESSORS_ONLN);
829
830 #elif defined(OS_WINDOWS)
831
832   GetSystemInfo(&sysinfo);
833   return sysinfo.dwNumberOfProcessors;
834
835 #elif defined(__FreeBSD__) || defined(__APPLE__)
836   m[0] = CTL_HW;
837   m[1] = HW_NCPU;
838   len = sizeof(int);
839   sysctl(m, 2, &count, &len, NULL, 0);
840
841   return count;
842 #else
843   return 2;
844 #endif
845 }
846
847 int main(int argc, char *argv[]){
848
849 #ifdef FORCE
850   char buffer[8192], *p, *q;
851   int length;
852 #endif
853
854   if (argc == 1) return 0;
855
856   switch (argv[1][0]) {
857
858   case '0' : /* for Makefile */
859
860 #ifdef FORCE
861     printf("CORE=%s\n", CORENAME);
862 #else
863 #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
864     printf("CORE=%s\n", get_corename());
865 #endif
866 #endif
867
868 #ifdef FORCE
869     printf("LIBCORE=%s\n", LIBNAME);
870 #else
871     printf("LIBCORE=");
872     get_libname();
873     printf("\n");
874 #endif
875
876     printf("NUM_CORES=%d\n", get_num_cores());
877
878 #if defined(__arm__) && !defined(FORCE)
879         get_features();
880 #endif
881
882
883 #if defined(__i386__) || defined(__x86_64__)
884 #ifndef FORCE
885     get_sse();
886 #else
887
888     sprintf(buffer, "%s", ARCHCONFIG);
889
890     p = &buffer[0];
891
892     while (*p) {
893       if ((*p == '-') && (*(p + 1) == 'D')) {
894         p += 2;
895
896         while ((*p != ' ') && (*p != '\0')) {
897
898           if (*p == '=') {
899             printf("=");
900             p ++;
901             while ((*p != ' ') && (*p != '\0')) {
902               printf("%c", *p);
903               p ++;
904             }
905           } else {
906             printf("%c", *p);
907             p ++;
908             if ((*p == ' ') || (*p =='\0')) printf("=1");
909           }
910         }
911
912         printf("\n");
913       } else p ++;
914     }
915 #endif
916 #endif
917
918 #if NO_PARALLEL_MAKE==1
919     printf("MAKE += -j 1\n");
920 #else
921 #ifndef OS_WINDOWS
922     printf("MAKE += -j %d\n", get_num_cores());
923 #endif
924 #endif
925
926     break;
927
928   case '1' : /* For config.h */
929 #ifdef FORCE
930     sprintf(buffer, "%s -DCORE_%s\n", ARCHCONFIG, CORENAME);
931
932     p = &buffer[0];
933     while (*p) {
934       if ((*p == '-') && (*(p + 1) == 'D')) {
935         p += 2;
936         printf("#define ");
937
938         while ((*p != ' ') && (*p != '\0')) {
939
940           if (*p == '=') {
941             printf(" ");
942             p ++;
943             while ((*p != ' ') && (*p != '\0')) {
944               printf("%c", *p);
945               p ++;
946             }
947           } else {
948             printf("%c", *p);
949             p ++;
950           }
951         }
952
953         printf("\n");
954       } else p ++;
955     }
956 #else
957     get_cpuconfig();
958 #endif
959
960 #ifdef FORCE
961     printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
962 #else
963 #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
964     printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
965 #endif
966 #endif
967
968  break;
969
970   case '2' : /* SMP */
971     if (get_num_cores() > 1) printf("SMP=1\n");
972     break;
973   }
974
975   fflush(stdout);
976
977   return 0;
978 }
979