POWER10: dgemv builtin rename
[platform/upstream/openblas.git] / cpuid_arm64.c
1 /**************************************************************************
2   Copyright (c) 2013, The OpenBLAS Project
3   All rights reserved.
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are
6   met:
7   1. Redistributions of source code must retain the above copyright
8   notice, this list of conditions and the following disclaimer.
9   2. Redistributions in binary form must reproduce the above copyright
10   notice, this list of conditions and the following disclaimer in
11   the documentation and/or other materials provided with the
12   distribution.
13   3. Neither the name of the OpenBLAS project nor the names of
14   its contributors may be used to endorse or promote products
15   derived from this software without specific prior written permission.
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19   ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25   USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26   *****************************************************************************/
27
28 #include <string.h>
29 #ifdef __APPLE__
30 #include <sys/sysctl.h>
31 int32_t value;
32 size_t length=sizeof(value);
33 int64_t value64;
34 size_t length64=sizeof(value64);
35 #endif
36
37 #define CPU_UNKNOWN             0
38 #define CPU_ARMV8               1
39 // Arm
40 #define CPU_CORTEXA53     2
41 #define CPU_CORTEXA55     14
42 #define CPU_CORTEXA57     3
43 #define CPU_CORTEXA72     4
44 #define CPU_CORTEXA73     5
45 #define CPU_NEOVERSEN1    11
46 #define CPU_NEOVERSEV1    16
47 #define CPU_NEOVERSEN2    17
48 #define CPU_CORTEXX1      18
49 #define CPU_CORTEXX2      19
50 #define CPU_CORTEXA510    20
51 #define CPU_CORTEXA710    21
52 // Qualcomm
53 #define CPU_FALKOR        6
54 // Cavium
55 #define CPU_THUNDERX      7
56 #define CPU_THUNDERX2T99  8
57 #define CPU_THUNDERX3T110 12
58 //Hisilicon
59 #define CPU_TSV110        9
60 // Ampere
61 #define CPU_EMAG8180     10
62 // Apple
63 #define CPU_VORTEX       13
64 // Fujitsu
65 #define CPU_A64FX        15
66 // Phytium
67 #define CPU_FT2000       22
68
69 static char *cpuname[] = {
70   "UNKNOWN",
71   "ARMV8" ,
72   "CORTEXA53",
73   "CORTEXA57",
74   "CORTEXA72",
75   "CORTEXA73",
76   "FALKOR",
77   "THUNDERX",
78   "THUNDERX2T99",
79   "TSV110",
80   "EMAG8180",
81   "NEOVERSEN1",
82   "THUNDERX3T110",
83   "VORTEX",
84   "CORTEXA55",
85   "A64FX",
86   "NEOVERSEV1",
87   "NEOVERSEN2",
88   "CORTEXX1",
89   "CORTEXX2",
90   "CORTEXA510",
91   "CORTEXA710",
92   "FT2000"
93 };
94
95 static char *cpuname_lower[] = {
96   "unknown",
97   "armv8",
98   "cortexa53",
99   "cortexa57",
100   "cortexa72",
101   "cortexa73",
102   "falkor",
103   "thunderx",
104   "thunderx2t99",
105   "tsv110",
106   "emag8180",
107   "neoversen1",
108   "thunderx3t110",
109   "vortex",
110   "cortexa55",
111   "a64fx",
112   "neoversev1",
113   "neoversen2",
114   "cortexx1",
115   "cortexx2",
116   "cortexa510",
117   "cortexa710",
118   "ft2000"
119 };
120
121 int get_feature(char *search)
122 {
123
124 #ifdef __linux
125         FILE *infile;
126         char buffer[2048], *p,*t;
127         p = (char *) NULL ;
128
129         infile = fopen("/proc/cpuinfo", "r");
130
131         while (fgets(buffer, sizeof(buffer), infile))
132         {
133
134                 if (!strncmp("Features", buffer, 8))
135                 {
136                         p = strchr(buffer, ':') + 2;
137                         break;
138                 }
139         }
140
141         fclose(infile);
142
143
144         if( p == NULL ) return 0;
145
146         t = strtok(p," ");
147         while( (t = strtok(NULL," ")))
148         {
149                 if (!strcmp(t, search))   { return(1); }
150         }
151
152 #endif
153         return(0);
154 }
155
156
157 int detect(void)
158 {
159
160 #ifdef __linux
161
162         FILE *infile;
163         char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
164         p = (char *) NULL ;
165
166         infile = fopen("/proc/cpuinfo", "r");
167         while (fgets(buffer, sizeof(buffer), infile)) {
168                 if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
169                         break;
170                 }
171
172                 if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
173                         cpu_part = strchr(buffer, ':') + 2;
174                         cpu_part = strdup(cpu_part);
175                 } else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
176                         cpu_implementer = strchr(buffer, ':') + 2;
177                         cpu_implementer = strdup(cpu_implementer);
178                 }
179         }
180
181         fclose(infile);
182         if(cpu_part != NULL && cpu_implementer != NULL) {
183     // Arm
184     if (strstr(cpu_implementer, "0x41")) {
185       if (strstr(cpu_part, "0xd03"))
186         return CPU_CORTEXA53;
187       else if (strstr(cpu_part, "0xd07"))
188         return CPU_CORTEXA57;
189       else if (strstr(cpu_part, "0xd08"))
190         return CPU_CORTEXA72;
191       else if (strstr(cpu_part, "0xd09"))
192         return CPU_CORTEXA73;
193       else if (strstr(cpu_part, "0xd0c"))
194         return CPU_NEOVERSEN1;
195       else if (strstr(cpu_part, "0xd40"))
196         return CPU_NEOVERSEV1;
197       else if (strstr(cpu_part, "0xd49"))
198         return CPU_NEOVERSEN2;
199       else if (strstr(cpu_part, "0xd05"))
200         return CPU_CORTEXA55;
201       else if (strstr(cpu_part, "0xd46"))
202         return CPU_CORTEXA510;
203       else if (strstr(cpu_part, "0xd47"))
204         return CPU_CORTEXA710;
205       else if (strstr(cpu_part, "0xd44"))
206         return CPU_CORTEXX1;
207       else if (strstr(cpu_part, "0xd4c"))
208         return CPU_CORTEXX2;
209     }
210     // Qualcomm
211     else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
212       return CPU_FALKOR;
213     // Cavium
214     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1"))
215                         return CPU_THUNDERX;
216     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
217                         return CPU_THUNDERX2T99;
218     else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8"))
219                         return CPU_THUNDERX3T110;
220     // HiSilicon
221     else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
222                         return CPU_TSV110;
223     // Ampere
224     else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
225                         return CPU_EMAG8180;
226     // Fujitsu
227     else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
228                         return CPU_A64FX;
229     // Apple
230     else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
231                         return CPU_VORTEX;
232    // Phytium
233    else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") 
234                         || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
235                         return CPU_FT2000;
236         }
237
238         p = (char *) NULL ;
239         infile = fopen("/proc/cpuinfo", "r");
240         while (fgets(buffer, sizeof(buffer), infile))
241         {
242
243                 if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) ||
244                     (!strncmp("CPU architecture", buffer, 16)))
245                 {
246                         p = strchr(buffer, ':') + 2;
247                         break;
248                 }
249         }
250
251         fclose(infile);
252
253         if(p != NULL)
254         {
255
256                 if ((strstr(p, "AArch64")) || (strstr(p, "8")))
257                 {
258                         return CPU_ARMV8;
259
260                 }
261
262
263         }
264 #else
265 #ifdef __APPLE__
266         sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
267         if (value ==131287967|| value == 458787763 ) return CPU_VORTEX;
268 #endif
269         return CPU_ARMV8;       
270 #endif
271
272         return CPU_UNKNOWN;
273 }
274
275 char *get_corename(void)
276 {
277         return cpuname[detect()];
278 }
279
280 void get_architecture(void)
281 {
282         printf("ARM64");
283 }
284
285 void get_subarchitecture(void)
286 {
287         int d = detect();
288         printf("%s", cpuname[d]);
289 }
290
291 void get_subdirname(void)
292 {
293         printf("arm64");
294 }
295
296 void get_cpucount(void)
297 {
298 int n=0;
299
300 #ifdef __linux
301         FILE *infile;
302         char buffer[2048], *p,*t;
303         p = (char *) NULL ;
304
305         infile = fopen("/proc/cpuinfo", "r");
306
307         while (fgets(buffer, sizeof(buffer), infile))
308         {
309
310                 if (!strncmp("processor", buffer, 9))
311                 n++;
312         }
313
314         fclose(infile);
315
316         printf("#define NUM_CORES %d\n",n);
317 #endif
318 #ifdef __APPLE__
319         sysctlbyname("hw.physicalcpu_max",&value,&length,NULL,0);
320         printf("#define NUM_CORES %d\n",value);
321 #endif  
322 }
323
324
325
326 void get_cpuconfig(void)
327 {
328
329   // All arches should define ARMv8
330   printf("#define ARMV8\n");
331   printf("#define HAVE_NEON\n"); // This shouldn't be necessary
332   printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary
333
334         int d = detect();
335         switch (d)
336         {
337
338             case CPU_CORTEXA53:
339             case CPU_CORTEXA55:
340                 printf("#define %s\n", cpuname[d]);
341               // Fall-through
342             case CPU_ARMV8:
343               // Minimum parameters for ARMv8 (based on A53)
344                 printf("#define L1_DATA_SIZE 32768\n");
345                 printf("#define L1_DATA_LINESIZE 64\n");
346                 printf("#define L2_SIZE 262144\n");
347                 printf("#define L2_LINESIZE 64\n");
348                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
349                 printf("#define DTB_SIZE 4096\n");
350                 printf("#define L2_ASSOCIATIVE 4\n");
351                         break;
352
353             case CPU_CORTEXA57:
354             case CPU_CORTEXA72:
355             case CPU_CORTEXA73:
356       // Common minimum settings for these Arm cores
357       // Can change a lot, but we need to be conservative
358       // TODO: detect info from /sys if possible
359                 printf("#define %s\n", cpuname[d]);
360                 printf("#define L1_CODE_SIZE 49152\n");
361                 printf("#define L1_CODE_LINESIZE 64\n");
362                 printf("#define L1_CODE_ASSOCIATIVE 3\n");
363                 printf("#define L1_DATA_SIZE 32768\n");
364                 printf("#define L1_DATA_LINESIZE 64\n");
365                 printf("#define L1_DATA_ASSOCIATIVE 2\n");
366                 printf("#define L2_SIZE 524288\n");
367                 printf("#define L2_LINESIZE 64\n");
368                 printf("#define L2_ASSOCIATIVE 16\n");
369                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
370                 printf("#define DTB_SIZE 4096\n");
371                 break;
372             case CPU_NEOVERSEN1:
373                 printf("#define %s\n", cpuname[d]);
374                 printf("#define L1_CODE_SIZE 65536\n");
375                 printf("#define L1_CODE_LINESIZE 64\n");
376                 printf("#define L1_CODE_ASSOCIATIVE 4\n");
377                 printf("#define L1_DATA_SIZE 65536\n");
378                 printf("#define L1_DATA_LINESIZE 64\n");
379                 printf("#define L1_DATA_ASSOCIATIVE 4\n");
380                 printf("#define L2_SIZE 1048576\n");
381                 printf("#define L2_LINESIZE 64\n");
382                 printf("#define L2_ASSOCIATIVE 8\n");
383                 printf("#define DTB_DEFAULT_ENTRIES 48\n");
384                 printf("#define DTB_SIZE 4096\n");
385                 break;
386
387             case CPU_NEOVERSEV1:
388                 printf("#define %s\n", cpuname[d]);
389                 printf("#define L1_CODE_SIZE 65536\n");
390                 printf("#define L1_CODE_LINESIZE 64\n");
391                 printf("#define L1_CODE_ASSOCIATIVE 4\n");
392                 printf("#define L1_DATA_SIZE 65536\n");
393                 printf("#define L1_DATA_LINESIZE 64\n");
394                 printf("#define L1_DATA_ASSOCIATIVE 4\n");
395                 printf("#define L2_SIZE 1048576\n");
396                 printf("#define L2_LINESIZE 64\n");
397                 printf("#define L2_ASSOCIATIVE 8\n");
398                 printf("#define DTB_DEFAULT_ENTRIES 48\n");
399                 printf("#define DTB_SIZE 4096\n");
400                 break;
401
402             case CPU_NEOVERSEN2:
403                 printf("#define %s\n", cpuname[d]);
404                 printf("#define L1_CODE_SIZE 65536\n");
405                 printf("#define L1_CODE_LINESIZE 64\n");
406                 printf("#define L1_CODE_ASSOCIATIVE 4\n");
407                 printf("#define L1_DATA_SIZE 65536\n");
408                 printf("#define L1_DATA_LINESIZE 64\n");
409                 printf("#define L1_DATA_ASSOCIATIVE 4\n");
410                 printf("#define L2_SIZE 1048576\n");
411                 printf("#define L2_LINESIZE 64\n");
412                 printf("#define L2_ASSOCIATIVE 8\n");
413                 printf("#define DTB_DEFAULT_ENTRIES 48\n");
414                 printf("#define DTB_SIZE 4096\n");
415                 break;
416             case CPU_CORTEXA510:
417             case CPU_CORTEXA710:
418             case CPU_CORTEXX1:
419             case CPU_CORTEXX2:
420                 printf("#define ARMV9\n");
421                 printf("#define %s\n", cpuname[d]);
422                 printf("#define L1_CODE_SIZE 65536\n");
423                 printf("#define L1_CODE_LINESIZE 64\n");
424                 printf("#define L1_CODE_ASSOCIATIVE 4\n");
425                 printf("#define L1_DATA_SIZE 65536\n");
426                 printf("#define L1_DATA_LINESIZE 64\n");
427                 printf("#define L1_DATA_ASSOCIATIVE 4\n");
428                 printf("#define L2_SIZE 1048576\n");
429                 printf("#define L2_LINESIZE 64\n");
430                 printf("#define L2_ASSOCIATIVE 8\n");
431                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
432                 printf("#define DTB_SIZE 4096\n");
433                 break;
434             case CPU_FALKOR:
435                 printf("#define FALKOR\n");
436                 printf("#define L1_CODE_SIZE 65536\n");
437                 printf("#define L1_CODE_LINESIZE 64\n");
438                 printf("#define L1_DATA_SIZE 32768\n");
439                 printf("#define L1_DATA_LINESIZE 128\n");
440                 printf("#define L2_SIZE 524288\n");
441                 printf("#define L2_LINESIZE 64\n");
442                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
443                 printf("#define DTB_SIZE 4096\n");
444                 printf("#define L2_ASSOCIATIVE 16\n");
445                 break;
446
447             case CPU_THUNDERX:
448                 printf("#define THUNDERX\n");
449                 printf("#define L1_DATA_SIZE 32768\n");
450                 printf("#define L1_DATA_LINESIZE 128\n");
451                 printf("#define L2_SIZE 16777216\n");
452                 printf("#define L2_LINESIZE 128\n");
453                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
454                 printf("#define DTB_SIZE 4096\n");
455                 printf("#define L2_ASSOCIATIVE 16\n");
456                 break;
457
458             case CPU_THUNDERX2T99:
459                 printf("#define THUNDERX2T99                  \n");
460                 printf("#define L1_CODE_SIZE         32768    \n");
461                 printf("#define L1_CODE_LINESIZE     64       \n");
462                 printf("#define L1_CODE_ASSOCIATIVE  8        \n");
463                 printf("#define L1_DATA_SIZE         32768    \n");
464                 printf("#define L1_DATA_LINESIZE     64       \n");
465                 printf("#define L1_DATA_ASSOCIATIVE  8        \n");
466                 printf("#define L2_SIZE              262144   \n");
467                 printf("#define L2_LINESIZE          64       \n");
468                 printf("#define L2_ASSOCIATIVE       8        \n");
469                 printf("#define L3_SIZE              33554432 \n");
470                 printf("#define L3_LINESIZE          64       \n");
471                 printf("#define L3_ASSOCIATIVE       32       \n");
472                 printf("#define DTB_DEFAULT_ENTRIES  64       \n");
473                 printf("#define DTB_SIZE             4096     \n");
474                 break;
475                         
476             case CPU_TSV110:
477                 printf("#define TSV110                        \n");
478                 printf("#define L1_CODE_SIZE         65536    \n");
479                 printf("#define L1_CODE_LINESIZE     64       \n");
480                 printf("#define L1_CODE_ASSOCIATIVE  4        \n");
481                 printf("#define L1_DATA_SIZE         65536    \n");
482                 printf("#define L1_DATA_LINESIZE     64       \n");
483                 printf("#define L1_DATA_ASSOCIATIVE  4        \n");
484                 printf("#define L2_SIZE              524228   \n");
485                 printf("#define L2_LINESIZE          64       \n");
486                 printf("#define L2_ASSOCIATIVE       8        \n");
487                 printf("#define DTB_DEFAULT_ENTRIES  64       \n");
488                 printf("#define DTB_SIZE             4096     \n");
489                 break;  
490
491             case CPU_EMAG8180:
492                  // Minimum parameters for ARMv8 (based on A53)
493                 printf("#define EMAG8180\n");
494                 printf("#define L1_CODE_SIZE 32768\n");
495                 printf("#define L1_DATA_SIZE 32768\n");
496                 printf("#define L1_DATA_LINESIZE 64\n");
497                 printf("#define L2_SIZE 262144\n");
498                 printf("#define L2_LINESIZE 64\n");
499                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
500                 printf("#define DTB_SIZE 4096\n");
501                 break;
502
503             case CPU_THUNDERX3T110:
504                 printf("#define THUNDERX3T110                 \n");
505                 printf("#define L1_CODE_SIZE         65536    \n");
506                 printf("#define L1_CODE_LINESIZE     64       \n");
507                 printf("#define L1_CODE_ASSOCIATIVE  8        \n");
508                 printf("#define L1_DATA_SIZE         32768    \n");
509                 printf("#define L1_DATA_LINESIZE     64       \n");
510                 printf("#define L1_DATA_ASSOCIATIVE  8        \n");
511                 printf("#define L2_SIZE              524288   \n");
512                 printf("#define L2_LINESIZE          64       \n");
513                 printf("#define L2_ASSOCIATIVE       8        \n");
514                 printf("#define L3_SIZE              94371840 \n");
515                 printf("#define L3_LINESIZE          64       \n");
516                 printf("#define L3_ASSOCIATIVE       32       \n");
517                 printf("#define DTB_DEFAULT_ENTRIES  64       \n");
518                 printf("#define DTB_SIZE             4096     \n");
519                 break;
520             case CPU_VORTEX:
521                 printf("#define VORTEX                        \n");
522 #ifdef __APPLE__
523                 sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
524                 printf("#define L1_CODE_SIZE         %lld       \n",value64);
525                 sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
526                 printf("#define L1_CODE_LINESIZE     %lld       \n",value64);
527                 sysctlbyname("hw.l1dcachesize",&value64,&length64,NULL,0);
528                 printf("#define L1_DATA_SIZE         %lld       \n",value64);
529                 sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
530                 printf("#define L2_SIZE      %lld       \n",value64);
531 #endif  
532                 printf("#define DTB_DEFAULT_ENTRIES  64       \n");
533                 printf("#define DTB_SIZE             4096     \n");
534                 break;
535             case CPU_A64FX:
536                 printf("#define A64FX\n");
537                 printf("#define L1_CODE_SIZE 65535\n");
538                 printf("#define L1_DATA_SIZE 65535\n");
539                 printf("#define L1_DATA_LINESIZE 256\n");
540                 printf("#define L2_SIZE 8388608\n");
541                 printf("#define L2_LINESIZE 256\n");
542                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
543                 printf("#define DTB_SIZE 4096\n");
544                 break;
545             case CPU_FT2000:
546                 printf("#define FT2000\n");
547                 printf("#define L1_CODE_SIZE 32768\n");
548                 printf("#define L1_DATA_SIZE 32768\n");
549                 printf("#define L1_DATA_LINESIZE 64\n");
550                 printf("#define L2_SIZE 33554432\n");
551                 printf("#define L2_LINESIZE 64\n");
552                 printf("#define DTB_DEFAULT_ENTRIES 64\n");
553                 printf("#define DTB_SIZE 4096\n");
554                 break;
555         }
556         get_cpucount();
557 }
558
559
560 void get_libname(void)
561 {
562         int d = detect();
563         printf("%s", cpuname_lower[d]);
564 }
565
566 void get_features(void)
567 {
568
569 #ifdef __linux
570         FILE *infile;
571         char buffer[2048], *p,*t;
572         p = (char *) NULL ;
573
574         infile = fopen("/proc/cpuinfo", "r");
575
576         while (fgets(buffer, sizeof(buffer), infile))
577         {
578
579                 if (!strncmp("Features", buffer, 8))
580                 {
581                         p = strchr(buffer, ':') + 2;
582                         break;
583                 }
584         }
585
586         fclose(infile);
587
588
589         if( p == NULL ) return;
590
591         t = strtok(p," ");
592         while( (t = strtok(NULL," ")))
593         {
594         }
595
596 #endif
597         return;
598 }