48d28278934a03d2462415338d99f07216389f3d
[platform/upstream/openblas.git] / getarch.c
1 /*****************************************************************************
2 Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8
9    1. Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the
15       distribution.
16    3. Neither the name of the ISCAS nor the names of its contributors may 
17       be used to endorse or promote products derived from this software 
18       without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 **********************************************************************************/
32
33 /*********************************************************************/
34 /* Copyright 2009, 2010 The University of Texas at Austin.           */
35 /* All rights reserved.                                              */
36 /*                                                                   */
37 /* Redistribution and use in source and binary forms, with or        */
38 /* without modification, are permitted provided that the following   */
39 /* conditions are met:                                               */
40 /*                                                                   */
41 /*   1. Redistributions of source code must retain the above         */
42 /*      copyright notice, this list of conditions and the following  */
43 /*      disclaimer.                                                  */
44 /*                                                                   */
45 /*   2. Redistributions in binary form must reproduce the above      */
46 /*      copyright notice, this list of conditions and the following  */
47 /*      disclaimer in the documentation and/or other materials       */
48 /*      provided with the distribution.                              */
49 /*                                                                   */
50 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
51 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
52 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
53 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
54 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
55 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
56 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
57 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
58 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
59 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
60 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
61 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
62 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
63 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
64 /*                                                                   */
65 /* The views and conclusions contained in the software and           */
66 /* documentation are those of the authors and should not be          */
67 /* interpreted as representing official policies, either expressed   */
68 /* or implied, of The University of Texas at Austin.                 */
69 /*********************************************************************/
70
71 #if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
72 #define OS_WINDOWS
73 #endif
74
75 #include <stdio.h>
76 #include <string.h>
77 #ifdef OS_WINDOWS
78 #include <windows.h>
79 #endif
80 #if defined(__FreeBSD__) || defined(__APPLE__)
81 #include <sys/types.h>
82 #include <sys/sysctl.h>
83 #endif
84 #ifdef linux
85 #include <sys/sysinfo.h>
86 #endif
87
88 /* #define FORCE_P2             */
89 /* #define FORCE_KATMAI         */
90 /* #define FORCE_COPPERMINE     */
91 /* #define FORCE_NORTHWOOD      */
92 /* #define FORCE_PRESCOTT       */
93 /* #define FORCE_BANIAS         */
94 /* #define FORCE_YONAH          */
95 /* #define FORCE_CORE2          */
96 /* #define FORCE_PENRYN         */
97 /* #define FORCE_DUNNINGTON     */
98 /* #define FORCE_NEHALEM        */
99 /* #define FORCE_SANDYBRIDGE    */
100 /* #define FORCE_ATOM           */
101 /* #define FORCE_ATHLON         */
102 /* #define FORCE_OPTERON        */
103 /* #define FORCE_OPTERON_SSE3   */
104 /* #define FORCE_BARCELONA      */
105 /* #define FORCE_SHANGHAI       */
106 /* #define FORCE_ISTANBUL       */
107 /* #define FORCE_BOBCAT         */
108 /* #define FORCE_BULLDOZER      */
109 /* #define FORCE_SSE_GENERIC    */
110 /* #define FORCE_VIAC3          */
111 /* #define FORCE_NANO           */
112 /* #define FORCE_POWER3         */
113 /* #define FORCE_POWER4         */
114 /* #define FORCE_POWER5         */
115 /* #define FORCE_POWER6         */
116 /* #define FORCE_PPCG4          */
117 /* #define FORCE_PPC970         */
118 /* #define FORCE_PPC970MP       */
119 /* #define FORCE_PPC440         */
120 /* #define FORCE_PPC440FP2      */
121 /* #define FORCE_CELL           */
122 /* #define FORCE_SICORTEX       */
123 /* #define FORCE_LOONGSON3A     */
124 /* #define FORCE_LOONGSON3B     */
125 /* #define FORCE_ITANIUM2       */
126 /* #define FORCE_SPARC          */
127 /* #define FORCE_SPARCV7        */
128 /* #define FORCE_GENERIC        */
129
130 #ifdef FORCE_P2
131 #define FORCE
132 #define FORCE_INTEL
133 #define ARCHITECTURE    "X86"
134 #define SUBARCHITECTURE "PENTIUM2"
135 #define ARCHCONFIG   "-DPENTIUM2 " \
136                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
137                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
138                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
139                      "-DHAVE_CMOV -DHAVE_MMX"
140 #define LIBNAME   "p2"
141 #define CORENAME  "P5"
142 #endif
143
144 #ifdef FORCE_KATMAI
145 #define FORCE
146 #define FORCE_INTEL
147 #define ARCHITECTURE    "X86"
148 #define SUBARCHITECTURE "PENTIUM3"
149 #define ARCHCONFIG   "-DPENTIUM3 " \
150                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
151                      "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \
152                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
153                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
154 #define LIBNAME   "katmai"
155 #define CORENAME  "KATMAI"
156 #endif
157
158 #ifdef FORCE_COPPERMINE
159 #define FORCE
160 #define FORCE_INTEL
161 #define ARCHITECTURE    "X86"
162 #define SUBARCHITECTURE "PENTIUM3"
163 #define ARCHCONFIG   "-DPENTIUM3 " \
164                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=32 " \
165                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
166                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
167                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE "
168 #define LIBNAME   "coppermine"
169 #define CORENAME  "COPPERMINE"
170 #endif
171
172 #ifdef FORCE_NORTHWOOD
173 #define FORCE
174 #define FORCE_INTEL
175 #define ARCHITECTURE    "X86"
176 #define SUBARCHITECTURE "PENTIUM4"
177 #define ARCHCONFIG   "-DPENTIUM4 " \
178                      "-DL1_DATA_SIZE=8192 -DL1_DATA_LINESIZE=64 " \
179                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
180                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
181                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
182 #define LIBNAME   "northwood"
183 #define CORENAME  "NORTHWOOD"
184 #endif
185
186 #ifdef FORCE_PRESCOTT
187 #define FORCE
188 #define FORCE_INTEL
189 #define ARCHITECTURE    "X86"
190 #define SUBARCHITECTURE "PENTIUM4"
191 #define ARCHCONFIG   "-DPENTIUM4 " \
192                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
193                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
194                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
195                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
196 #define LIBNAME   "prescott"
197 #define CORENAME  "PRESCOTT"
198 #endif
199
200 #ifdef FORCE_BANIAS
201 #define FORCE
202 #define FORCE_INTEL
203 #define ARCHITECTURE    "X86"
204 #define SUBARCHITECTURE "BANIAS"
205 #define ARCHCONFIG   "-DPENTIUMM " \
206                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
207                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
208                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
209                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
210 #define LIBNAME   "banias"
211 #define CORENAME  "BANIAS"
212 #endif
213
214 #ifdef FORCE_YONAH
215 #define FORCE
216 #define FORCE_INTEL
217 #define ARCHITECTURE    "X86"
218 #define SUBARCHITECTURE "YONAH"
219 #define ARCHCONFIG   "-DPENTIUMM " \
220                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
221                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
222                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
223                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
224 #define LIBNAME   "yonah"
225 #define CORENAME  "YONAH"
226 #endif
227
228 #ifdef FORCE_CORE2
229 #define FORCE
230 #define FORCE_INTEL
231 #define ARCHITECTURE    "X86"
232 #define SUBARCHITECTURE "CONRORE"
233 #define ARCHCONFIG   "-DCORE2 " \
234                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
235                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
236                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
237                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
238 #define LIBNAME   "core2"
239 #define CORENAME  "CORE2"
240 #endif
241
242 #ifdef FORCE_PENRYN
243 #define FORCE
244 #define FORCE_INTEL
245 #define ARCHITECTURE    "X86"
246 #define SUBARCHITECTURE "PENRYN"
247 #define ARCHCONFIG   "-DPENRYN " \
248                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
249                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
250                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
251                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
252 #define LIBNAME   "penryn"
253 #define CORENAME  "PENRYN"
254 #endif
255
256 #ifdef FORCE_DUNNINGTON
257 #define FORCE
258 #define FORCE_INTEL
259 #define ARCHITECTURE    "X86"
260 #define SUBARCHITECTURE "DUNNINGTON"
261 #define ARCHCONFIG   "-DDUNNINGTON " \
262                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
263                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
264                      "-DL3_SIZE=16777216 -DL3_LINESIZE=64 " \
265                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 " \
266                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1"
267 #define LIBNAME   "dunnington"
268 #define CORENAME  "DUNNINGTON"
269 #endif
270
271 #ifdef FORCE_NEHALEM
272 #define FORCE
273 #define FORCE_INTEL
274 #define ARCHITECTURE    "X86"
275 #define SUBARCHITECTURE "NEHALEM"
276 #define ARCHCONFIG   "-DNEHALEM " \
277                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
278                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
279                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
280                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2"
281 #define LIBNAME   "nehalem"
282 #define CORENAME  "NEHALEM"
283 #endif
284
285 #ifdef FORCE_SANDYBRIDGE
286 #define FORCE
287 #define FORCE_INTEL
288 #define ARCHITECTURE    "X86"
289 #define SUBARCHITECTURE "SANDYBRIDGE"
290 #define ARCHCONFIG   "-DSANDYBRIDGE " \
291                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
292                      "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
293                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
294                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
295 #define LIBNAME   "sandybridge"
296 #define CORENAME  "SANDYBRIDGE"
297 #endif
298
299 #ifdef FORCE_ATOM
300 #define FORCE
301 #define FORCE_INTEL
302 #define ARCHITECTURE    "X86"
303 #define SUBARCHITECTURE "ATOM"
304 #define ARCHCONFIG   "-DATOM " \
305                      "-DL1_DATA_SIZE=24576 -DL1_DATA_LINESIZE=64 " \
306                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
307                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
308                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
309 #define LIBNAME   "atom"
310 #define CORENAME  "ATOM"
311 #endif
312
313 #ifdef FORCE_ATHLON
314 #define FORCE
315 #define FORCE_INTEL
316 #define ARCHITECTURE    "X86"
317 #define SUBARCHITECTURE "ATHLON"
318 #define ARCHCONFIG   "-DATHLON " \
319                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
320                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
321                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW  " \
322                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE "
323 #define LIBNAME   "athlon"
324 #define CORENAME  "ATHLON"
325 #endif
326
327 #ifdef FORCE_OPTERON
328 #define FORCE
329 #define FORCE_INTEL
330 #define ARCHITECTURE    "X86"
331 #define SUBARCHITECTURE "OPTERON"
332 #define ARCHCONFIG   "-DOPTERON " \
333                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
334                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
335                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
336                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 "
337 #define LIBNAME   "opteron"
338 #define CORENAME  "OPTERON"
339 #endif
340
341 #ifdef FORCE_OPTERON_SSE3
342 #define FORCE
343 #define FORCE_INTEL
344 #define ARCHITECTURE    "X86"
345 #define SUBARCHITECTURE "OPTERON"
346 #define ARCHCONFIG   "-DOPTERON " \
347                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
348                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
349                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
350                      "-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3"
351 #define LIBNAME   "opteron"
352 #define CORENAME  "OPTERON"
353 #endif
354
355 #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
356 #define FORCE
357 #define FORCE_INTEL
358 #define ARCHITECTURE    "X86"
359 #define SUBARCHITECTURE "BARCELONA"
360 #define ARCHCONFIG   "-DBARCELONA " \
361                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
362                      "-DL2_SIZE=524288 -DL2_LINESIZE=64  -DL3_SIZE=2097152 " \
363                      "-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
364                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
365                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
366 #define LIBNAME   "barcelona"
367 #define CORENAME  "BARCELONA"
368 #endif
369
370 #if defined(FORCE_BOBCAT) 
371 #define FORCE
372 #define FORCE_INTEL
373 #define ARCHITECTURE    "X86"
374 #define SUBARCHITECTURE "BOBCAT"
375 #define ARCHCONFIG   "-DBOBCAT " \
376                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
377                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
378                      "-DDTB_DEFAULT_ENTRIES=40 -DDTB_SIZE=4096 " \
379                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 " \
380                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_CFLUSH -DHAVE_CMOV"
381 #define LIBNAME   "bobcat"
382 #define CORENAME  "BOBCAT"
383 #endif
384
385 #if defined (FORCE_BULLDOZER)
386 #define FORCE
387 #define FORCE_INTEL
388 #define ARCHITECTURE    "X86"
389 #define SUBARCHITECTURE "BULLDOZER"
390 #define ARCHCONFIG   "-DBULLDOZER " \
391                      "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
392                      "-DL2_SIZE=1024000 -DL2_LINESIZE=64  -DL3_SIZE=16777216 " \
393                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
394                      "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
395                      "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
396                      "-DHAVE_AVX -DHAVE_FMA4"
397 #define LIBNAME   "bulldozer"
398 #define CORENAME  "BULLDOZER"
399 #endif
400
401 #ifdef FORCE_SSE_GENERIC
402 #define FORCE
403 #define FORCE_INTEL
404 #define ARCHITECTURE    "X86"
405 #define SUBARCHITECTURE "GENERIC"
406 #define ARCHCONFIG   "-DGENERIC " \
407                      "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
408                      "-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
409                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
410                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2"
411 #define LIBNAME   "generic"
412 #define CORENAME  "GENERIC"
413 #endif
414
415 #ifdef FORCE_VIAC3
416 #define FORCE
417 #define FORCE_INTEL
418 #define ARCHITECTURE    "X86"
419 #define SUBARCHITECTURE "VIAC3"
420 #define ARCHCONFIG   "-DVIAC3 " \
421                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
422                      "-DL2_SIZE=65536 -DL2_LINESIZE=32 " \
423                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 " \
424                      "-DHAVE_MMX -DHAVE_SSE "
425 #define LIBNAME   "viac3"
426 #define CORENAME  "VIAC3"
427 #endif
428
429 #ifdef FORCE_NANO
430 #define FORCE
431 #define FORCE_INTEL
432 #define ARCHITECTURE    "X86"
433 #define SUBARCHITECTURE "NANO"
434 #define ARCHCONFIG   "-DNANO " \
435                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
436                      "-DL2_SIZE=1048576 -DL2_LINESIZE=64 " \
437                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " \
438                      "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3"
439 #define LIBNAME   "nano"
440 #define CORENAME  "NANO"
441 #endif
442
443 #ifdef FORCE_POWER3
444 #define FORCE
445 #define ARCHITECTURE    "POWER"
446 #define SUBARCHITECTURE "POWER3"
447 #define SUBDIRNAME      "power"
448 #define ARCHCONFIG   "-DPOWER3 " \
449                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
450                      "-DL2_SIZE=2097152 -DL2_LINESIZE=128 " \
451                      "-DDTB_DEFAULT_ENTRIES=256 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
452 #define LIBNAME   "power3"
453 #define CORENAME  "POWER3"
454 #endif
455
456 #ifdef FORCE_POWER4
457 #define FORCE
458 #define ARCHITECTURE    "POWER"
459 #define SUBARCHITECTURE "POWER4"
460 #define SUBDIRNAME      "power"
461 #define ARCHCONFIG   "-DPOWER4 " \
462                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
463                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
464                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
465 #define LIBNAME   "power4"
466 #define CORENAME  "POWER4"
467 #endif
468
469 #ifdef FORCE_POWER5
470 #define FORCE
471 #define ARCHITECTURE    "POWER"
472 #define SUBARCHITECTURE "POWER5"
473 #define SUBDIRNAME      "power"
474 #define ARCHCONFIG   "-DPOWER5 " \
475                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
476                      "-DL2_SIZE=1509949 -DL2_LINESIZE=128 " \
477                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=6 "
478 #define LIBNAME   "power5"
479 #define CORENAME  "POWER5"
480 #endif
481
482 #ifdef FORCE_POWER6
483 #define FORCE
484 #define ARCHITECTURE    "POWER"
485 #define SUBARCHITECTURE "POWER6"
486 #define SUBDIRNAME      "power"
487 #define ARCHCONFIG   "-DPOWER6 " \
488                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
489                      "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
490                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
491 #define LIBNAME   "power6"
492 #define CORENAME  "POWER6"
493 #endif
494
495 #ifdef FORCE_PPCG4
496 #define FORCE
497 #define ARCHITECTURE    "POWER"
498 #define SUBARCHITECTURE "PPCG4"
499 #define SUBDIRNAME      "power"
500 #define ARCHCONFIG   "-DPPCG4 " \
501                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
502                      "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
503                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
504 #define LIBNAME   "ppcg4"
505 #define CORENAME  "PPCG4"
506 #endif
507
508 #ifdef FORCE_PPC970
509 #define FORCE
510 #define ARCHITECTURE    "POWER"
511 #define SUBARCHITECTURE "PPC970"
512 #define SUBDIRNAME      "power"
513 #define ARCHCONFIG   "-DPPC970 " \
514                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
515                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
516                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
517 #define LIBNAME   "ppc970"
518 #define CORENAME  "PPC970"
519 #endif
520
521 #ifdef FORCE_PPC970MP
522 #define FORCE
523 #define ARCHITECTURE    "POWER"
524 #define SUBARCHITECTURE "PPC970"
525 #define SUBDIRNAME      "power"
526 #define ARCHCONFIG   "-DPPC970 " \
527                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
528                      "-DL2_SIZE=1024976 -DL2_LINESIZE=128 " \
529                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
530 #define LIBNAME   "ppc970mp"
531 #define CORENAME  "PPC970"
532 #endif
533
534 #ifdef FORCE_PPC440
535 #define FORCE
536 #define ARCHITECTURE    "POWER"
537 #define SUBARCHITECTURE "PPC440"
538 #define SUBDIRNAME      "power"
539 #define ARCHCONFIG   "-DPPC440 " \
540                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
541                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
542                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
543 #define LIBNAME   "ppc440"
544 #define CORENAME  "PPC440"
545 #endif
546
547 #ifdef FORCE_PPC440FP2
548 #define FORCE
549 #define ARCHITECTURE    "POWER"
550 #define SUBARCHITECTURE "PPC440FP2"
551 #define SUBDIRNAME      "power"
552 #define ARCHCONFIG   "-DPPC440FP2 " \
553                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
554                      "-DL2_SIZE=16384 -DL2_LINESIZE=128 " \
555                      "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 "
556 #define LIBNAME   "ppc440FP2"
557 #define CORENAME  "PPC440FP2"
558 #endif
559
560 #ifdef FORCE_CELL
561 #define FORCE
562 #define ARCHITECTURE    "POWER"
563 #define SUBARCHITECTURE "CELL"
564 #define SUBDIRNAME      "power"
565 #define ARCHCONFIG   "-DCELL " \
566                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
567                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
568                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
569 #define LIBNAME   "cell"
570 #define CORENAME  "CELL"
571 #endif
572
573 #ifdef FORCE_SICORTEX
574 #define FORCE
575 #define ARCHITECTURE    "MIPS"
576 #define SUBARCHITECTURE "SICORTEX"
577 #define SUBDIRNAME      "mips"
578 #define ARCHCONFIG   "-DSICORTEX " \
579                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
580                      "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
581                      "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
582 #define LIBNAME   "mips"
583 #define CORENAME  "sicortex"
584 #endif
585
586
587 #ifdef FORCE_LOONGSON3A
588 #define FORCE
589 #define ARCHITECTURE    "MIPS"
590 #define SUBARCHITECTURE "LOONGSON3A"
591 #define SUBDIRNAME      "mips64"
592 #define ARCHCONFIG   "-DLOONGSON3A " \
593        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
594        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
595        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
596 #define LIBNAME   "loongson3a"
597 #define CORENAME  "LOONGSON3A"
598 #else
599 #endif
600
601 #ifdef FORCE_LOONGSON3B
602 #define FORCE
603 #define ARCHITECTURE    "MIPS"
604 #define SUBARCHITECTURE "LOONGSON3B"
605 #define SUBDIRNAME      "mips64"
606 #define ARCHCONFIG   "-DLOONGSON3B " \
607        "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
608        "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
609        "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
610 #define LIBNAME   "loongson3b"
611 #define CORENAME  "LOONGSON3B"
612 #else
613 #endif
614
615 #ifdef FORCE_ITANIUM2
616 #define FORCE
617 #define ARCHITECTURE    "IA64"
618 #define SUBARCHITECTURE "ITANIUM2"
619 #define SUBDIRNAME      "ia64"
620 #define ARCHCONFIG   "-DITANIUM2 " \
621                      "-DL1_DATA_SIZE=262144 -DL1_DATA_LINESIZE=128 " \
622                      "-DL2_SIZE=1572864 -DL2_LINESIZE=128 -DDTB_SIZE=16384 -DDTB_DEFAULT_ENTRIES=128 "
623 #define LIBNAME   "itanium2"
624 #define CORENAME  "itanium2"
625 #endif
626
627 #ifdef FORCE_SPARC
628 #define FORCE
629 #define ARCHITECTURE    "SPARC"
630 #define SUBARCHITECTURE "SPARC"
631 #define SUBDIRNAME      "sparc"
632 #define ARCHCONFIG   "-DSPARC -DV9 " \
633                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
634                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
635 #define LIBNAME   "sparc"
636 #define CORENAME  "sparc"
637 #endif
638
639 #ifdef FORCE_SPARCV7
640 #define FORCE
641 #define ARCHITECTURE    "SPARC"
642 #define SUBARCHITECTURE "SPARC"
643 #define SUBDIRNAME      "sparc"
644 #define ARCHCONFIG   "-DSPARC -DV7 " \
645                      "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
646                      "-DL2_SIZE=1572864 -DL2_LINESIZE=64 -DDTB_SIZE=8192 -DDTB_DEFAULT_ENTRIES=64 "
647 #define LIBNAME   "sparcv7"
648 #define CORENAME  "sparcv7"
649 #endif
650
651 #ifdef FORCE_GENERIC
652 #define FORCE
653 #define ARCHITECTURE    "GENERIC"
654 #define SUBARCHITECTURE "GENERIC"
655 #define SUBDIRNAME      "generic"
656 #define ARCHCONFIG   "-DGENERIC " \
657                      "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
658                      "-DL2_SIZE=512488 -DL2_LINESIZE=128 " \
659                      "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
660 #define LIBNAME   "generic"
661 #define CORENAME  "generic"
662 #endif
663
664 #ifndef FORCE
665
666 #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \
667     defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__)
668 #ifndef POWER
669 #define POWER
670 #endif
671 #define OPENBLAS_SUPPORTED
672 #endif
673
674 #if defined(__i386__) || (__x86_64__)
675 #include "cpuid_x86.c"
676 #define OPENBLAS_SUPPORTED
677 #endif
678
679 #ifdef __ia64__
680 #include "cpuid_ia64.c"
681 #define OPENBLAS_SUPPORTED
682 #endif
683
684 #ifdef __alpha
685 #include "cpuid_alpha.c"
686 #define OPENBLAS_SUPPORTED
687 #endif
688
689 #ifdef POWER
690 #include "cpuid_power.c"
691 #define OPENBLAS_SUPPORTED
692 #endif
693
694 #ifdef sparc
695 #include "cpuid_sparc.c"
696 #define OPENBLAS_SUPPORTED
697 #endif
698
699 #ifdef __mips__
700 #include "cpuid_mips.c"
701 #define OPENBLAS_SUPPORTED
702 #endif
703
704 #ifndef OPENBLAS_SUPPORTED
705 #error "This arch/CPU is not supported by OpenBLAS."
706 #endif
707
708 #else
709
710 #endif
711
712 static int get_num_cores(void) {
713
714 #ifdef OS_WINDOWS
715   SYSTEM_INFO sysinfo;
716 #elif defined(__FreeBSD__) || defined(__APPLE__)
717   int m[2], count;
718   size_t len;
719 #endif
720   
721 #ifdef linux
722   return get_nprocs();
723   
724 #elif defined(OS_WINDOWS)
725
726   GetSystemInfo(&sysinfo);
727   return sysinfo.dwNumberOfProcessors;
728
729 #elif defined(__FreeBSD__) || defined(__APPLE__)
730   m[0] = CTL_HW;
731   m[1] = HW_NCPU;
732   len = sizeof(int);
733   sysctl(m, 2, &count, &len, NULL, 0);
734
735   return count;
736 #else
737   return 2;
738 #endif
739 }
740
741 int main(int argc, char *argv[]){
742
743 #ifdef FORCE
744   char buffer[8192], *p, *q;
745   int length;
746 #endif
747
748   if (argc == 1) return 0;
749
750   switch (argv[1][0]) {
751
752   case '0' : /* for Makefile */
753
754 #ifdef FORCE
755     printf("CORE=%s\n", CORENAME);
756 #else    
757 #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__)
758     printf("CORE=%s\n", get_corename());
759 #endif
760 #endif
761
762 #ifdef FORCE
763     printf("LIBCORE=%s\n", LIBNAME);
764 #else
765     printf("LIBCORE=");
766     get_libname();
767     printf("\n");
768 #endif
769
770     printf("NUM_CORES=%d\n", get_num_cores());
771
772 #if defined(__i386__) || defined(__x86_64__)
773 #ifndef FORCE
774     get_sse();
775 #else
776
777     sprintf(buffer, "%s", ARCHCONFIG);
778
779     p = &buffer[0];
780
781     while (*p) {
782       if ((*p == '-') && (*(p + 1) == 'D')) {
783         p += 2;
784
785         while ((*p != ' ') && (*p != '\0')) {
786
787           if (*p == '=') {
788             printf("=");
789             p ++;
790             while ((*p != ' ') && (*p != '\0')) {
791               printf("%c", *p);
792               p ++;
793             }
794           } else {
795             printf("%c", *p);
796             p ++;
797             if ((*p == ' ') || (*p =='\0')) printf("=1");
798           }
799         }
800
801         printf("\n");
802       } else p ++;
803     }
804 #endif
805 #endif
806
807 #if NO_PARALLEL_MAKE==1
808     printf("MAKE += -j 1\n");
809 #else
810 #ifndef OS_WINDOWS
811     printf("MAKE += -j %d\n", get_num_cores());
812 #endif
813 #endif
814
815     break;
816
817   case '1' : /* For config.h */
818 #ifdef FORCE
819     sprintf(buffer, "%s -DCORE_%s\n", ARCHCONFIG, CORENAME);
820
821     p = &buffer[0];
822     while (*p) {
823       if ((*p == '-') && (*(p + 1) == 'D')) {
824         p += 2;
825         printf("#define ");
826
827         while ((*p != ' ') && (*p != '\0')) {
828
829           if (*p == '=') {
830             printf(" ");
831             p ++;
832             while ((*p != ' ') && (*p != '\0')) {
833               printf("%c", *p);
834               p ++;
835             }
836           } else {
837             printf("%c", *p);
838             p ++;
839           }
840         }
841
842         printf("\n");
843       } else p ++;
844     }
845 #else
846     get_cpuconfig();
847 #endif
848  break;
849
850   case '2' : /* SMP */
851     if (get_num_cores() > 1) printf("SMP=1\n");
852     break;
853   }
854
855   fflush(stdout);
856
857   return 0;
858 }
859