Optimize cdot function for POWER10
[platform/upstream/openblas.git] / cpuid_arm64.c
1 /**************************************************************************
2   Copyright (c) 2013, The OpenBLAS Project
3   All rights reserved.
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are
6   met:
7   1. Redistributions of source code must retain the above copyright
8   notice, this list of conditions and the following disclaimer.
9   2. Redistributions in binary form must reproduce the above copyright
10   notice, this list of conditions and the following disclaimer in
11   the documentation and/or other materials provided with the
12   distribution.
13   3. Neither the name of the OpenBLAS project nor the names of
14   its contributors may be used to endorse or promote products
15   derived from this software without specific prior written permission.
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19   ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25   USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26   *****************************************************************************/
27
28 #include <string.h>
29 #ifdef OS_DARWIN
30 #include <sys/sysctl.h>
31 int32_t value;
32 size_t length=sizeof(value);
33 #endif
34
35 #define CPU_UNKNOWN             0
36 #define CPU_ARMV8               1
37 // Arm
38 #define CPU_CORTEXA53     2
39 #define CPU_CORTEXA57     3
40 #define CPU_CORTEXA72     4
41 #define CPU_CORTEXA73     5
42 #define CPU_NEOVERSEN1    11
43 // Qualcomm
44 #define CPU_FALKOR        6
45 // Cavium
46 #define CPU_THUNDERX      7
47 #define CPU_THUNDERX2T99  8
48 #define CPU_THUNDERX3T110 12
49 //Hisilicon
50 #define CPU_TSV110        9
51 // Ampere
52 #define CPU_EMAG8180     10
53 // Apple
54 #define CPU_VORTEX       13
55
56 static char *cpuname[] = {
57   "UNKNOWN",
58   "ARMV8" ,
59   "CORTEXA53",
60   "CORTEXA57",
61   "CORTEXA72",
62   "CORTEXA73",
63   "FALKOR",
64   "THUNDERX",
65   "THUNDERX2T99",
66   "TSV110",
67   "EMAG8180",
68   "NEOVERSEN1",
69   "THUNDERX3T110",
70   "VORTEX"      
71 };
72
73 static char *cpuname_lower[] = {
74   "unknown",
75   "armv8",
76   "cortexa53",
77   "cortexa57",
78   "cortexa72",
79   "cortexa73",
80   "falkor",
81   "thunderx",
82   "thunderx2t99",
83   "tsv110",
84   "emag8180",
85   "neoversen1",
86   "thunderx3t110",
87   "vortex"      
88 };
89
90 int get_feature(char *search)
91 {
92
93 #ifdef __linux
94         FILE *infile;
95         char buffer[2048], *p,*t;
96         p = (char *) NULL ;
97
98         infile = fopen("/proc/cpuinfo", "r");
99
100         while (fgets(buffer, sizeof(buffer), infile))
101         {
102
103                 if (!strncmp("Features", buffer, 8))
104                 {
105                         p = strchr(buffer, ':') + 2;
106                         break;
107                 }
108         }
109
110         fclose(infile);
111
112
113         if( p == NULL ) return 0;
114
115         t = strtok(p," ");
116         while( (t = strtok(NULL," ")))
117         {
118                 if (!strcmp(t, search))   { return(1); }
119         }
120
121 #endif
122         return(0);
123 }
124
125
126 int detect(void)
127 {
128
129 #ifdef __linux
130
131         FILE *infile;
132         char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
133         p = (char *) NULL ;
134
135         infile = fopen("/proc/cpuinfo", "r");
136         while (fgets(buffer, sizeof(buffer), infile)) {
137                 if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
138                         break;
139                 }
140
141                 if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
142                         cpu_part = strchr(buffer, ':') + 2;
143                         cpu_part = strdup(cpu_part);
144                 } else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
145                         cpu_implementer = strchr(buffer, ':') + 2;
146                         cpu_implementer = strdup(cpu_implementer);
147                 }
148         }
149
150         fclose(infile);
151         if(cpu_part != NULL && cpu_implementer != NULL) {
152     // Arm
153     if (strstr(cpu_implementer, "0x41")) {
154       if (strstr(cpu_part, "0xd03"))
155         return CPU_CORTEXA53;
156       else if (strstr(cpu_part, "0xd07"))
157         return CPU_CORTEXA57;
158       else if (strstr(cpu_part, "0xd08"))
159         return CPU_CORTEXA72;
160       else if (strstr(cpu_part, "0xd09"))
161         return CPU_CORTEXA73;
162       else if (strstr(cpu_part, "0xd0c"))
163         return CPU_NEOVERSEN1;
164     }
165     // Qualcomm
166     else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
167       return CPU_FALKOR;
168     // Cavium
169     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1"))
170                         return CPU_THUNDERX;
171     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
172                         return CPU_THUNDERX2T99;
173     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8"))
174                         return CPU_THUNDERX3T110;
175     // HiSilicon
176     else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
177                         return CPU_TSV110;
178     // Ampere
179     else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
180                         return CPU_EMAG8180;
181         }
182
183         p = (char *) NULL ;
184         infile = fopen("/proc/cpuinfo", "r");
185         while (fgets(buffer, sizeof(buffer), infile))
186         {
187
188                 if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) ||
189                     (!strncmp("CPU architecture", buffer, 16)))
190                 {
191                         p = strchr(buffer, ':') + 2;
192                         break;
193                 }
194         }
195
196         fclose(infile);
197
198         if(p != NULL)
199         {
200
201                 if ((strstr(p, "AArch64")) || (strstr(p, "8")))
202                 {
203                         return CPU_ARMV8;
204
205                 }
206
207
208         }
209 #else
210 #ifdef DARWIN
211         sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
212         if (value ==131287967) return CPU_VORTEX;
213 #endif
214         return CPU_ARMV8;       
215 #endif
216
217         return CPU_UNKNOWN;
218 }
219
220 char *get_corename(void)
221 {
222         return cpuname[detect()];
223 }
224
225 void get_architecture(void)
226 {
227         printf("ARM64");
228 }
229
230 void get_subarchitecture(void)
231 {
232         int d = detect();
233         printf("%s", cpuname[d]);
234 }
235
236 void get_subdirname(void)
237 {
238         printf("arm64");
239 }
240
241 void get_cpucount(void)
242 {
243 int n=0;
244
245 #ifdef __linux
246         FILE *infile;
247         char buffer[2048], *p,*t;
248         p = (char *) NULL ;
249
250         infile = fopen("/proc/cpuinfo", "r");
251
252         while (fgets(buffer, sizeof(buffer), infile))
253         {
254
255                 if (!strncmp("processor", buffer, 9))
256                 n++;
257         }
258
259         fclose(infile);
260
261         printf("#define NUM_CORES %d\n",n);
262 #endif
263 #ifdef DARWIN
264         sysctlbyname("hw.physicalcpu_max",&value,&length,NULL,0);
265         printf("#define NUM_CORES %d\n",value);
266 #endif  
267 }
268
269
270
271 void get_cpuconfig(void)
272 {
273
274   // All arches should define ARMv8
275   printf("#define ARMV8\n");
276   printf("#define HAVE_NEON\n"); // This shouldn't be necessary
277   printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary
278
279         int d = detect();
280         switch (d)
281         {
282
283     case CPU_CORTEXA53:
284       printf("#define %s\n", cpuname[d]);
285       // Fall-through
286                 case CPU_ARMV8:
287       // Minimum parameters for ARMv8 (based on A53)
288         printf("#define L1_DATA_SIZE 32768\n");
289         printf("#define L1_DATA_LINESIZE 64\n");
290         printf("#define L2_SIZE 262144\n");
291         printf("#define L2_LINESIZE 64\n");
292         printf("#define DTB_DEFAULT_ENTRIES 64\n");
293         printf("#define DTB_SIZE 4096\n");
294         printf("#define L2_ASSOCIATIVE 4\n");
295                         break;
296
297                 case CPU_CORTEXA57:
298                 case CPU_CORTEXA72:
299                 case CPU_CORTEXA73:
300       // Common minimum settings for these Arm cores
301       // Can change a lot, but we need to be conservative
302       // TODO: detect info from /sys if possible
303       printf("#define %s\n", cpuname[d]);
304                         printf("#define L1_CODE_SIZE 49152\n");
305                         printf("#define L1_CODE_LINESIZE 64\n");
306                         printf("#define L1_CODE_ASSOCIATIVE 3\n");
307                         printf("#define L1_DATA_SIZE 32768\n");
308                         printf("#define L1_DATA_LINESIZE 64\n");
309                         printf("#define L1_DATA_ASSOCIATIVE 2\n");
310       printf("#define L2_SIZE 524288\n");
311                         printf("#define L2_LINESIZE 64\n");
312                         printf("#define L2_ASSOCIATIVE 16\n");
313                         printf("#define DTB_DEFAULT_ENTRIES 64\n");
314                         printf("#define DTB_SIZE 4096\n");
315                         break;
316                 case CPU_NEOVERSEN1:
317                         printf("#define %s\n", cpuname[d]);
318                         printf("#define L1_CODE_SIZE 65536\n");
319                         printf("#define L1_CODE_LINESIZE 64\n");
320                         printf("#define L1_CODE_ASSOCIATIVE 4\n");
321                         printf("#define L1_DATA_SIZE 65536\n");
322                         printf("#define L1_DATA_LINESIZE 64\n");
323                         printf("#define L1_DATA_ASSOCIATIVE 4\n");
324                         printf("#define L2_SIZE 1048576\n");
325                         printf("#define L2_LINESIZE 64\n");
326                         printf("#define L2_ASSOCIATIVE 16\n");
327                         printf("#define DTB_DEFAULT_ENTRIES 64\n");
328                         printf("#define DTB_SIZE 4096\n");
329                         break;
330
331     case CPU_FALKOR:
332       printf("#define FALKOR\n");
333       printf("#define L1_CODE_SIZE 65536\n");
334       printf("#define L1_CODE_LINESIZE 64\n");
335       printf("#define L1_DATA_SIZE 32768\n");
336       printf("#define L1_DATA_LINESIZE 128\n");
337       printf("#define L2_SIZE 524288\n");
338       printf("#define L2_LINESIZE 64\n");
339       printf("#define DTB_DEFAULT_ENTRIES 64\n");
340       printf("#define DTB_SIZE 4096\n");
341       printf("#define L2_ASSOCIATIVE 16\n");
342       break;
343
344                 case CPU_THUNDERX:
345                         printf("#define THUNDERX\n");
346                         printf("#define L1_DATA_SIZE 32768\n");
347                         printf("#define L1_DATA_LINESIZE 128\n");
348                         printf("#define L2_SIZE 16777216\n");
349                         printf("#define L2_LINESIZE 128\n");
350                         printf("#define DTB_DEFAULT_ENTRIES 64\n");
351                         printf("#define DTB_SIZE 4096\n");
352                         printf("#define L2_ASSOCIATIVE 16\n");
353                         break;
354
355                 case CPU_THUNDERX2T99:
356                         printf("#define THUNDERX2T99                  \n");
357                         printf("#define L1_CODE_SIZE         32768    \n");
358                         printf("#define L1_CODE_LINESIZE     64       \n");
359                         printf("#define L1_CODE_ASSOCIATIVE  8        \n");
360                         printf("#define L1_DATA_SIZE         32768    \n");
361                         printf("#define L1_DATA_LINESIZE     64       \n");
362                         printf("#define L1_DATA_ASSOCIATIVE  8        \n");
363                         printf("#define L2_SIZE              262144   \n");
364                         printf("#define L2_LINESIZE          64       \n");
365                         printf("#define L2_ASSOCIATIVE       8        \n");
366                         printf("#define L3_SIZE              33554432 \n");
367                         printf("#define L3_LINESIZE          64       \n");
368                         printf("#define L3_ASSOCIATIVE       32       \n");
369                         printf("#define DTB_DEFAULT_ENTRIES  64       \n");
370                         printf("#define DTB_SIZE             4096     \n");
371                         break;
372                         
373                 case CPU_TSV110:
374                         printf("#define TSV110                        \n");
375                         printf("#define L1_CODE_SIZE         65536    \n");
376                         printf("#define L1_CODE_LINESIZE     64       \n");
377                         printf("#define L1_CODE_ASSOCIATIVE  4        \n");
378                         printf("#define L1_DATA_SIZE         65536    \n");
379                         printf("#define L1_DATA_LINESIZE     64       \n");
380                         printf("#define L1_DATA_ASSOCIATIVE  4        \n");
381                         printf("#define L2_SIZE              524228   \n");
382                         printf("#define L2_LINESIZE          64       \n");
383                         printf("#define L2_ASSOCIATIVE       8        \n");
384                         printf("#define DTB_DEFAULT_ENTRIES  64       \n");
385                         printf("#define DTB_SIZE             4096     \n");
386                         break;  
387
388                 case CPU_EMAG8180:
389       // Minimum parameters for ARMv8 (based on A53)
390         printf("#define EMAG8180\n");
391         printf("#define L1_CODE_SIZE 32768\n");
392         printf("#define L1_DATA_SIZE 32768\n");
393         printf("#define L1_DATA_LINESIZE 64\n");
394         printf("#define L2_SIZE 262144\n");
395         printf("#define L2_LINESIZE 64\n");
396         printf("#define DTB_DEFAULT_ENTRIES 64\n");
397         printf("#define DTB_SIZE 4096\n");
398                         break;
399
400                 case CPU_THUNDERX3T110:
401                         printf("#define THUNDERX3T110                 \n");
402                         printf("#define L1_CODE_SIZE         65536    \n");
403                         printf("#define L1_CODE_LINESIZE     64       \n");
404                         printf("#define L1_CODE_ASSOCIATIVE  8        \n");
405                         printf("#define L1_DATA_SIZE         32768    \n");
406                         printf("#define L1_DATA_LINESIZE     64       \n");
407                         printf("#define L1_DATA_ASSOCIATIVE  8        \n");
408                         printf("#define L2_SIZE              524288   \n");
409                         printf("#define L2_LINESIZE          64       \n");
410                         printf("#define L2_ASSOCIATIVE       8        \n");
411                         printf("#define L3_SIZE              94371840 \n");
412                         printf("#define L3_LINESIZE          64       \n");
413                         printf("#define L3_ASSOCIATIVE       32       \n");
414                         printf("#define DTB_DEFAULT_ENTRIES  64       \n");
415                         printf("#define DTB_SIZE             4096     \n");
416                         break;
417 #ifdef DARWIN
418                 case CPU_VORTEX:
419                         printf("#define VORTEX                        \n");
420                         sysctlbyname("hw.l1icachesize",&value,&length,NULL,0);
421                         printf("#define L1_CODE_SIZE         %d       \n",value);
422                         sysctlbyname("hw.cachelinesize",&value,&length,NULL,0);
423                         printf("#define L1_CODE_LINESIZE     %d       \n",value);
424                         sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
425                         printf("#define L1_DATA_SIZE         %d       \n",value);
426                         sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
427                         printf("#define L2_SIZE      %d       \n",value);
428                         break;
429 #endif                  
430         }
431         get_cpucount();
432 }
433
434
435 void get_libname(void)
436 {
437         int d = detect();
438         printf("%s", cpuname_lower[d]);
439 }
440
441 void get_features(void)
442 {
443
444 #ifdef __linux
445         FILE *infile;
446         char buffer[2048], *p,*t;
447         p = (char *) NULL ;
448
449         infile = fopen("/proc/cpuinfo", "r");
450
451         while (fgets(buffer, sizeof(buffer), infile))
452         {
453
454                 if (!strncmp("Features", buffer, 8))
455                 {
456                         p = strchr(buffer, ':') + 2;
457                         break;
458                 }
459         }
460
461         fclose(infile);
462
463
464         if( p == NULL ) return;
465
466         t = strtok(p," ");
467         while( (t = strtok(NULL," ")))
468         {
469         }
470
471 #endif
472         return;
473 }