1 /* ACLE support for AArch64 SVE
2 Copyright (C) 2018-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
30 #include "insn-codes.h"
33 #include "diagnostic.h"
35 #include "basic-block.h"
37 #include "fold-const.h"
39 #include "gimple-iterator.h"
43 #include "tree-vector-builder.h"
44 #include "stor-layout.h"
47 #include "gimple-fold.h"
48 #include "langhooks.h"
49 #include "stringpool.h"
50 #include "aarch64-sve-builtins.h"
51 #include "aarch64-sve-builtins-base.h"
52 #include "aarch64-sve-builtins-shapes.h"
54 namespace aarch64_sve {
56 /* Static information about each single-predicate or single-vector
58 struct vector_type_info
60 /* The name of the type as declared by arm_sve.h. */
61 const char *acle_name;
63 /* The name of the type specified in AAPCS64. The type is always
64 available under this name, even when arm_sve.h isn't included. */
67 /* The C++ mangling of ABI_NAME. */
68 const char *mangled_name;
71 /* Describes a function decl. */
72 class GTY(()) registered_function
75 /* The ACLE function that the decl represents. */
76 function_instance instance GTY ((skip));
78 /* The decl itself. */
81 /* The architecture extensions that the function requires, as a set of
82 AARCH64_FL_* flags. */
83 uint64_t required_extensions;
85 /* True if the decl represents an overloaded function that needs to be
86 resolved by function_resolver. */
90 /* Hash traits for registered_function. */
91 struct registered_function_hasher : nofree_ptr_hash <registered_function>
93 typedef function_instance compare_type;
95 static hashval_t hash (value_type);
96 static bool equal (value_type, const compare_type &);
99 /* Information about each single-predicate or single-vector type. */
100 static CONSTEXPR const vector_type_info vector_types[] = {
101 #define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
102 { #ACLE_NAME, #ABI_NAME, #NCHARS #ABI_NAME },
103 #include "aarch64-sve-builtins.def"
106 /* The function name suffix associated with each predication type. */
107 static const char *const pred_suffixes[NUM_PREDS + 1] = {
116 /* Static information about each mode_suffix_index. */
117 CONSTEXPR const mode_suffix_info mode_suffixes[] = {
118 #define VECTOR_TYPE_none NUM_VECTOR_TYPES
119 #define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) \
120 { "_" #NAME, VECTOR_TYPE_##BASE, VECTOR_TYPE_##DISPLACEMENT, UNITS_##UNITS },
121 #include "aarch64-sve-builtins.def"
122 #undef VECTOR_TYPE_none
123 { "", NUM_VECTOR_TYPES, NUM_VECTOR_TYPES, UNITS_none }
126 /* Static information about each type_suffix_index. */
127 CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
128 #define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \
130 VECTOR_TYPE_##ACLE_TYPE, \
133 BITS / BITS_PER_UNIT, \
134 TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
135 TYPE_##CLASS == TYPE_unsigned, \
136 TYPE_##CLASS == TYPE_float, \
137 TYPE_##CLASS == TYPE_bool, \
140 #include "aarch64-sve-builtins.def"
141 { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false,
145 /* Define a TYPES_<combination> macro for each combination of type
146 suffixes that an ACLE function can have, where <combination> is the
147 name used in DEF_SVE_FUNCTION entries.
149 Use S (T) for single type suffix T and D (T1, T2) for a pair of type
150 suffixes T1 and T2. Use commas to separate the suffixes.
152 Although the order shouldn't matter, the convention is to sort the
153 suffixes lexicographically after dividing suffixes into a type
154 class ("b", "f", etc.) and a numerical bit count. */
156 /* _b8 _b16 _b32 _b64. */
157 #define TYPES_all_pred(S, D) \
158 S (b8), S (b16), S (b32), S (b64)
160 /* _f16 _f32 _f64. */
161 #define TYPES_all_float(S, D) \
162 S (f16), S (f32), S (f64)
164 /* _s8 _s16 _s32 _s64. */
165 #define TYPES_all_signed(S, D) \
166 S (s8), S (s16), S (s32), S (s64)
169 _s8 _s16 _s32 _s64. */
170 #define TYPES_all_float_and_signed(S, D) \
171 TYPES_all_float (S, D), TYPES_all_signed (S, D)
173 /* _u8 _u16 _u32 _u64. */
174 #define TYPES_all_unsigned(S, D) \
175 S (u8), S (u16), S (u32), S (u64)
177 /* _s8 _s16 _s32 _s64
178 _u8 _u16 _u32 _u64. */
179 #define TYPES_all_integer(S, D) \
180 TYPES_all_signed (S, D), TYPES_all_unsigned (S, D)
184 _u8 _u16 _u32 _u64. */
185 #define TYPES_all_data(S, D) \
186 TYPES_all_float (S, D), TYPES_all_integer (S, D)
189 #define TYPES_b(S, D) \
193 #define TYPES_bhs_signed(S, D) \
194 S (s8), S (s16), S (s32)
197 #define TYPES_bhs_unsigned(S, D) \
198 S (u8), S (u16), S (u32)
202 #define TYPES_bhs_integer(S, D) \
203 TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D)
207 #define TYPES_h_integer(S, D) \
211 #define TYPES_hs_float(S, D) \
216 #define TYPES_hsd_integer(S, D) \
217 S (s16), S (s32), S (s64), S (u16), S (u32), S (u64)
220 #define TYPES_s_integer(S, D) \
225 #define TYPES_sd_integer(S, D) \
226 S (s32), S (s64), S (u32), S (u64)
231 #define TYPES_sd_data(S, D) \
232 S (f32), S (f64), TYPES_sd_integer (S, D)
237 #define TYPES_all_float_and_sd_integer(S, D) \
238 TYPES_all_float (S, D), TYPES_sd_integer (S, D)
242 #define TYPES_d_integer(S, D) \
245 /* All the type combinations allowed by svcvt. */
246 #define TYPES_cvt(S, D) \
247 D (f16, f32), D (f16, f64), \
248 D (f16, s16), D (f16, s32), D (f16, s64), \
249 D (f16, u16), D (f16, u32), D (f16, u64), \
251 D (f32, f16), D (f32, f64), \
252 D (f32, s32), D (f32, s64), \
253 D (f32, u32), D (f32, u64), \
255 D (f64, f16), D (f64, f32), \
256 D (f64, s32), D (f64, s64), \
257 D (f64, u32), D (f64, u64), \
260 D (s32, f16), D (s32, f32), D (s32, f64), \
261 D (s64, f16), D (s64, f32), D (s64, f64), \
264 D (u32, f16), D (u32, f32), D (u32, f64), \
265 D (u64, f16), D (u64, f32), D (u64, f64)
267 /* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
269 #define TYPES_inc_dec_n1(D, A) \
270 D (A, b8), D (A, b16), D (A, b32), D (A, b64)
271 #define TYPES_inc_dec_n(S, D) \
272 TYPES_inc_dec_n1 (D, s32), \
273 TYPES_inc_dec_n1 (D, s64), \
274 TYPES_inc_dec_n1 (D, u32), \
275 TYPES_inc_dec_n1 (D, u64)
277 /* { _f16 _f32 _f64 } { _f16 _f32 _f64 }
278 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
279 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }. */
280 #define TYPES_reinterpret1(D, A) \
281 D (A, f16), D (A, f32), D (A, f64), \
282 D (A, s8), D (A, s16), D (A, s32), D (A, s64), \
283 D (A, u8), D (A, u16), D (A, u32), D (A, u64)
284 #define TYPES_reinterpret(S, D) \
285 TYPES_reinterpret1 (D, f16), \
286 TYPES_reinterpret1 (D, f32), \
287 TYPES_reinterpret1 (D, f64), \
288 TYPES_reinterpret1 (D, s8), \
289 TYPES_reinterpret1 (D, s16), \
290 TYPES_reinterpret1 (D, s32), \
291 TYPES_reinterpret1 (D, s64), \
292 TYPES_reinterpret1 (D, u8), \
293 TYPES_reinterpret1 (D, u16), \
294 TYPES_reinterpret1 (D, u32), \
295 TYPES_reinterpret1 (D, u64)
297 /* { _b8 _b16 _b32 _b64 } x { _s32 _s64 }
299 #define TYPES_while1(D, bn) \
300 D (bn, s32), D (bn, s64), D (bn, u32), D (bn, u64)
301 #define TYPES_while(S, D) \
302 TYPES_while1 (D, b8), \
303 TYPES_while1 (D, b16), \
304 TYPES_while1 (D, b32), \
305 TYPES_while1 (D, b64)
307 /* Describe a pair of type suffixes in which only the first is used. */
308 #define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES }
310 /* Describe a pair of type suffixes in which both are used. */
311 #define DEF_DOUBLE_TYPE(X, Y) { TYPE_SUFFIX_ ## X, TYPE_SUFFIX_ ## Y }
313 /* Create an array that can be used in aarch64-sve-builtins.def to
314 select the type suffixes in TYPES_<NAME>. */
315 #define DEF_SVE_TYPES_ARRAY(NAME) \
316 static const type_suffix_pair types_##NAME[] = { \
317 TYPES_##NAME (DEF_VECTOR_TYPE, DEF_DOUBLE_TYPE), \
318 { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } \
321 /* For functions that don't take any type suffixes. */
322 static const type_suffix_pair types_none[] = {
323 { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES },
324 { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES }
327 /* Create an array for each TYPES_<combination> macro above. */
328 DEF_SVE_TYPES_ARRAY (all_pred);
329 DEF_SVE_TYPES_ARRAY (all_float);
330 DEF_SVE_TYPES_ARRAY (all_signed);
331 DEF_SVE_TYPES_ARRAY (all_float_and_signed);
332 DEF_SVE_TYPES_ARRAY (all_unsigned);
333 DEF_SVE_TYPES_ARRAY (all_integer);
334 DEF_SVE_TYPES_ARRAY (all_data);
335 DEF_SVE_TYPES_ARRAY (b);
336 DEF_SVE_TYPES_ARRAY (bhs_signed);
337 DEF_SVE_TYPES_ARRAY (bhs_unsigned);
338 DEF_SVE_TYPES_ARRAY (bhs_integer);
339 DEF_SVE_TYPES_ARRAY (h_integer);
340 DEF_SVE_TYPES_ARRAY (hs_float);
341 DEF_SVE_TYPES_ARRAY (hsd_integer);
342 DEF_SVE_TYPES_ARRAY (s_integer);
343 DEF_SVE_TYPES_ARRAY (sd_integer);
344 DEF_SVE_TYPES_ARRAY (sd_data);
345 DEF_SVE_TYPES_ARRAY (all_float_and_sd_integer);
346 DEF_SVE_TYPES_ARRAY (d_integer);
347 DEF_SVE_TYPES_ARRAY (cvt);
348 DEF_SVE_TYPES_ARRAY (inc_dec_n);
349 DEF_SVE_TYPES_ARRAY (reinterpret);
350 DEF_SVE_TYPES_ARRAY (while);
352 /* Used by functions that have no governing predicate. */
353 static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
355 /* Used by functions that have a governing predicate but do not have an
357 static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS };
359 /* Used by functions that allow merging, zeroing and "don't care"
361 static const predication_index preds_mxz[] = {
362 PRED_m, PRED_x, PRED_z, NUM_PREDS
365 /* Used by functions that have the mxz predicated forms above, and in addition
366 have an unpredicated form. */
367 static const predication_index preds_mxz_or_none[] = {
368 PRED_m, PRED_x, PRED_z, PRED_none, NUM_PREDS
371 /* Used by functions that allow merging and zeroing predication but have
373 static const predication_index preds_mz[] = { PRED_m, PRED_z, NUM_PREDS };
375 /* Used by functions that have an unpredicated form and a _z predicated
377 static const predication_index preds_z_or_none[] = {
378 PRED_z, PRED_none, NUM_PREDS
381 /* Used by (mostly predicate) functions that only support "_z" predication. */
382 static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
384 /* A list of all SVE ACLE functions. */
385 static CONSTEXPR const function_group_info function_groups[] = {
386 #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
387 { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
388 REQUIRED_EXTENSIONS | AARCH64_FL_SVE },
389 #include "aarch64-sve-builtins.def"
392 /* The scalar type associated with each vector type. */
393 GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
395 /* The single-predicate and single-vector types, with their built-in
396 "__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always
397 yields a null tree. */
398 static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
400 /* Same, but with the arm_sve.h "sv..._t" name. */
401 GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
403 /* The svpattern enum type. */
404 GTY(()) tree acle_svpattern;
406 /* The svprfop enum type. */
407 GTY(()) tree acle_svprfop;
409 /* The list of all registered function decls, indexed by code. */
410 static GTY(()) vec<registered_function *, va_gc> *registered_functions;
412 /* All registered function decls, hashed on the function_instance
413 that they implement. This is used for looking up implementations of
414 overloaded functions. */
415 static hash_table<registered_function_hasher> *function_table;
417 /* True if we've already complained about attempts to use functions
418 when the required extension is disabled. */
419 static bool reported_missing_extension_p;
421 /* If TYPE is an ACLE vector type, return the associated vector_type,
422 otherwise return NUM_VECTOR_TYPES. */
423 static vector_type_index
424 find_vector_type (const_tree type)
426 /* A linear search should be OK here, since the code isn't hot and
427 the number of types is only small. */
428 type = TYPE_MAIN_VARIANT (type);
429 for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
430 if (type == abi_vector_types[i])
431 return vector_type_index (i);
432 return NUM_VECTOR_TYPES;
435 /* If TYPE is a valid SVE element type, return the corresponding type
436 suffix, otherwise return NUM_TYPE_SUFFIXES. */
437 static type_suffix_index
438 find_type_suffix_for_scalar_type (const_tree type)
440 /* A linear search should be OK here, since the code isn't hot and
441 the number of types is only small. */
442 type = TYPE_MAIN_VARIANT (type);
443 for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
444 if (!type_suffixes[suffix_i].bool_p)
446 vector_type_index vector_i = type_suffixes[suffix_i].vector_type;
447 if (type == TYPE_MAIN_VARIANT (scalar_types[vector_i]))
448 return type_suffix_index (suffix_i);
450 return NUM_TYPE_SUFFIXES;
453 /* Report an error against LOCATION that the user has tried to use
454 function FNDECL when extension EXTENSION is disabled. */
456 report_missing_extension (location_t location, tree fndecl,
457 const char *extension)
459 /* Avoid reporting a slew of messages for a single oversight. */
460 if (reported_missing_extension_p)
463 error_at (location, "ACLE function %qD requires ISA extension %qs",
465 inform (location, "you can enable %qs using the command-line"
466 " option %<-march%>, or by using the %<target%>"
467 " attribute or pragma", extension);
468 reported_missing_extension_p = true;
471 /* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
472 enabled, given that those extensions are required for function FNDECL.
473 Report an error against LOCATION if not. */
475 check_required_extensions (location_t location, tree fndecl,
476 uint64_t required_extensions)
478 uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags;
479 if (missing_extensions == 0)
482 static const struct { uint64_t flag; const char *name; } extensions[] = {
483 #define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \
484 SYNTHETIC, FEATURE_STRING) \
485 { FLAG_CANONICAL, EXT_NAME },
486 #include "aarch64-option-extensions.def"
489 for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i)
490 if (missing_extensions & extensions[i].flag)
492 report_missing_extension (location, fndecl, extensions[i].name);
498 /* Report that LOCATION has a call to FNDECL in which argument ARGNO
499 was not an integer constant expression. ARGNO counts from zero. */
501 report_non_ice (location_t location, tree fndecl, unsigned int argno)
503 error_at (location, "argument %d of %qE must be an integer constant"
504 " expression", argno + 1, fndecl);
507 /* Report that LOCATION has a call to FNDECL in which argument ARGNO has
508 the value ACTUAL, whereas the function requires a value in the range
509 [MIN, MAX]. ARGNO counts from zero. */
511 report_out_of_range (location_t location, tree fndecl, unsigned int argno,
512 HOST_WIDE_INT actual, HOST_WIDE_INT min,
515 error_at (location, "passing %wd to argument %d of %qE, which expects"
516 " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
520 /* Report that LOCATION has a call to FNDECL in which argument ARGNO has
521 the value ACTUAL, whereas the function requires either VALUE0 or
522 VALUE1. ARGNO counts from zero. */
524 report_neither_nor (location_t location, tree fndecl, unsigned int argno,
525 HOST_WIDE_INT actual, HOST_WIDE_INT value0,
526 HOST_WIDE_INT value1)
528 error_at (location, "passing %wd to argument %d of %qE, which expects"
529 " either %wd or %wd", actual, argno + 1, fndecl, value0, value1);
532 /* Report that LOCATION has a call to FNDECL in which argument ARGNO has
533 the value ACTUAL, whereas the function requires one of VALUE0..3.
534 ARGNO counts from zero. */
536 report_not_one_of (location_t location, tree fndecl, unsigned int argno,
537 HOST_WIDE_INT actual, HOST_WIDE_INT value0,
538 HOST_WIDE_INT value1, HOST_WIDE_INT value2,
539 HOST_WIDE_INT value3)
541 error_at (location, "passing %wd to argument %d of %qE, which expects"
542 " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1,
546 /* Report that LOCATION has a call to FNDECL in which argument ARGNO has
547 the value ACTUAL, whereas the function requires a valid value of
548 enum type ENUMTYPE. ARGNO counts from zero. */
550 report_not_enum (location_t location, tree fndecl, unsigned int argno,
551 HOST_WIDE_INT actual, tree enumtype)
553 error_at (location, "passing %wd to argument %d of %qE, which expects"
554 " a valid %qT value", actual, argno + 1, fndecl, enumtype);
557 /* Return a hash code for a function_instance. */
559 function_instance::hash () const
562 /* BASE uniquely determines BASE_NAME, so we don't need to hash both. */
565 h.add_int (mode_suffix_id);
566 h.add_int (type_suffix_ids[0]);
567 h.add_int (type_suffix_ids[1]);
572 /* Return a set of CP_* flags that describe what the function could do,
573 taking the command-line flags into account. */
575 function_instance::call_properties () const
577 unsigned int flags = base->call_properties (*this);
579 /* -fno-trapping-math means that we can assume any FP exceptions
580 are not user-visible. */
581 if (!flag_trapping_math)
582 flags &= ~CP_RAISE_FP_EXCEPTIONS;
587 /* Return true if calls to the function could read some form of
590 function_instance::reads_global_state_p () const
592 unsigned int flags = call_properties ();
594 /* Preserve any dependence on rounding mode, flush to zero mode, etc.
595 There is currently no way of turning this off; in particular,
596 -fno-rounding-math (which is the default) means that we should make
597 the usual assumptions about rounding mode, which for intrinsics means
598 acting as the instructions do. */
599 if (flags & CP_READ_FPCR)
602 /* Handle direct reads of global state. */
603 return flags & (CP_READ_MEMORY | CP_READ_FFR);
606 /* Return true if calls to the function could modify some form of
609 function_instance::modifies_global_state_p () const
611 unsigned int flags = call_properties ();
613 /* Preserve any exception state written back to the FPCR,
614 unless -fno-trapping-math says this is unnecessary. */
615 if (flags & CP_RAISE_FP_EXCEPTIONS)
618 /* Treat prefetches as modifying global state, since that's the
619 only means we have of keeping them in their correct position. */
620 if (flags & CP_PREFETCH_MEMORY)
623 /* Handle direct modifications of global state. */
624 return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR);
627 /* Return true if calls to the function could raise a signal. */
629 function_instance::could_trap_p () const
631 unsigned int flags = call_properties ();
633 /* Handle functions that could raise SIGFPE. */
634 if (flags & CP_RAISE_FP_EXCEPTIONS)
637 /* Handle functions that could raise SIGBUS or SIGSEGV. */
638 if (flags & (CP_READ_MEMORY | CP_WRITE_MEMORY))
645 registered_function_hasher::hash (value_type value)
647 return value->instance.hash ();
651 registered_function_hasher::equal (value_type value, const compare_type &key)
653 return value->instance == key;
656 sve_switcher::sve_switcher ()
657 : m_old_isa_flags (aarch64_isa_flags)
659 /* Changing the ISA flags and have_regs_of_mode should be enough here.
660 We shouldn't need to pay the compile-time cost of a full target
662 aarch64_isa_flags = (AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16
665 memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
666 sizeof (have_regs_of_mode));
667 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
668 if (aarch64_sve_mode_p ((machine_mode) i))
669 have_regs_of_mode[i] = true;
672 sve_switcher::~sve_switcher ()
674 memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
675 sizeof (have_regs_of_mode));
676 aarch64_isa_flags = m_old_isa_flags;
679 function_builder::function_builder ()
681 m_overload_type = build_function_type (void_type_node, void_list_node);
682 m_direct_overloads = lang_GNU_CXX ();
683 gcc_obstack_init (&m_string_obstack);
686 function_builder::~function_builder ()
688 obstack_free (&m_string_obstack, NULL);
691 /* Add NAME to the end of the function name being built. */
693 function_builder::append_name (const char *name)
695 obstack_grow (&m_string_obstack, name, strlen (name));
698 /* Zero-terminate and complete the function name being built. */
700 function_builder::finish_name ()
702 obstack_1grow (&m_string_obstack, 0);
703 return (char *) obstack_finish (&m_string_obstack);
706 /* Return the overloaded or full function name for INSTANCE; OVERLOADED_P
707 selects which. Allocate the string on m_string_obstack; the caller
708 must use obstack_free to free it after use. */
710 function_builder::get_name (const function_instance &instance,
713 append_name (instance.base_name);
715 switch (instance.displacement_units ())
721 append_name ("_offset");
725 append_name ("_index");
729 append_name ("_vnum");
733 append_name (instance.mode_suffix ().string);
734 for (unsigned int i = 0; i < 2; ++i)
735 if (!overloaded_p || instance.shape->explicit_type_suffix_p (i))
736 append_name (instance.type_suffix (i).string);
737 append_name (pred_suffixes[instance.pred]);
738 return finish_name ();
741 /* Add attribute NAME to ATTRS. */
743 add_attribute (const char *name, tree attrs)
745 return tree_cons (get_identifier (name), NULL_TREE, attrs);
748 /* Return the appropriate function attributes for INSTANCE. */
750 function_builder::get_attributes (const function_instance &instance)
752 tree attrs = NULL_TREE;
754 if (!instance.modifies_global_state_p ())
756 if (instance.reads_global_state_p ())
757 attrs = add_attribute ("pure", attrs);
759 attrs = add_attribute ("const", attrs);
762 if (!flag_non_call_exceptions || !instance.could_trap_p ())
763 attrs = add_attribute ("nothrow", attrs);
765 return add_attribute ("leaf", attrs);
768 /* Add a function called NAME with type FNTYPE and attributes ATTRS.
769 INSTANCE describes what the function does and OVERLOADED_P indicates
770 whether it is overloaded. REQUIRED_EXTENSIONS are the set of
771 architecture extensions that the function requires. */
772 registered_function &
773 function_builder::add_function (const function_instance &instance,
774 const char *name, tree fntype, tree attrs,
775 uint64_t required_extensions,
778 unsigned int code = vec_safe_length (registered_functions);
779 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_SVE;
780 tree decl = simulate_builtin_function_decl (input_location, name, fntype,
783 registered_function &rfn = *ggc_alloc <registered_function> ();
784 rfn.instance = instance;
786 rfn.required_extensions = required_extensions;
787 rfn.overloaded_p = overloaded_p;
788 vec_safe_push (registered_functions, &rfn);
793 /* Add a built-in function for INSTANCE, with the argument types given
794 by ARGUMENT_TYPES and the return type given by RETURN_TYPE.
795 REQUIRED_EXTENSIONS are the set of architecture extensions that the
796 function requires. FORCE_DIRECT_OVERLOADS is true if there is a
797 one-to-one mapping between "short" and "full" names, and if standard
798 overload resolution therefore isn't necessary. */
800 function_builder::add_unique_function (const function_instance &instance,
802 vec<tree> &argument_types,
803 uint64_t required_extensions,
804 bool force_direct_overloads)
806 /* Add the function under its full (unique) name. */
807 char *name = get_name (instance, false);
808 tree fntype = build_function_type_array (return_type,
809 argument_types.length (),
810 argument_types.address ());
811 tree attrs = get_attributes (instance);
812 registered_function &rfn = add_function (instance, name, fntype, attrs,
813 required_extensions, false);
815 /* Enter the function into the hash table. */
816 hashval_t hash = instance.hash ();
817 registered_function **rfn_slot
818 = function_table->find_slot_with_hash (instance, hash, INSERT);
819 gcc_assert (!*rfn_slot);
822 /* Also add the function under its overloaded alias, if we want
823 a separate decl for each instance of an overloaded function. */
824 if (m_direct_overloads || force_direct_overloads)
826 char *overload_name = get_name (instance, true);
827 if (strcmp (name, overload_name) != 0)
829 /* Attribute lists shouldn't be shared. */
830 tree attrs = get_attributes (instance);
831 add_function (instance, overload_name, fntype, attrs,
832 required_extensions, false);
836 obstack_free (&m_string_obstack, name);
839 /* Add one function decl for INSTANCE, to be used with manual overload
840 resolution. REQUIRED_EXTENSIONS are the set of architecture extensions
841 that the function requires.
843 For simplicity, deal with duplicate attempts to add the same
846 function_builder::add_overloaded_function (const function_instance &instance,
847 uint64_t required_extensions)
849 char *name = get_name (instance, true);
850 if (registered_function **map_value = m_overload_names.get (name))
851 gcc_assert ((*map_value)->instance == instance
852 && (*map_value)->required_extensions == required_extensions);
855 registered_function &rfn
856 = add_function (instance, name, m_overload_type, NULL_TREE,
857 required_extensions, true);
858 const char *permanent_name = IDENTIFIER_POINTER (DECL_NAME (rfn.decl));
859 m_overload_names.put (permanent_name, &rfn);
861 obstack_free (&m_string_obstack, name);
864 /* If we are using manual overload resolution, add one function decl
865 for each overloaded function in GROUP. Take the function base name
866 from GROUP and the mode from MODE. */
868 function_builder::add_overloaded_functions (const function_group_info &group,
869 mode_suffix_index mode)
871 if (m_direct_overloads)
874 unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0);
875 unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1);
876 for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
878 if (!explicit_type0 && !explicit_type1)
880 /* Deal with the common case in which there is one overloaded
881 function for all type combinations. */
882 function_instance instance (group.base_name, *group.base,
883 *group.shape, mode, types_none[0],
885 add_overloaded_function (instance, group.required_extensions);
888 for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
891 /* Stub out the types that are determined by overload
893 type_suffix_pair types = {
894 explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES,
895 explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES
897 function_instance instance (group.base_name, *group.base,
898 *group.shape, mode, types,
900 add_overloaded_function (instance, group.required_extensions);
905 /* Register all the functions in GROUP. */
907 function_builder::register_function_group (const function_group_info &group)
909 (*group.shape)->build (*this, group);
912 function_call_info::function_call_info (location_t location_in,
913 const function_instance &instance_in,
915 : function_instance (instance_in), location (location_in), fndecl (fndecl_in)
919 function_resolver::function_resolver (location_t location,
920 const function_instance &instance,
921 tree fndecl, vec<tree, va_gc> &arglist)
922 : function_call_info (location, instance, fndecl), m_arglist (arglist)
926 /* Return the vector type associated with type suffix TYPE. */
928 function_resolver::get_vector_type (type_suffix_index type)
930 return acle_vector_types[0][type_suffixes[type].vector_type];
933 /* Return the <stdint.h> name associated with TYPE. Using the <stdint.h>
934 name should be more user-friendly than the underlying canonical type,
935 since it makes the signedness and bitwidth explicit. */
937 function_resolver::get_scalar_type_name (type_suffix_index type)
939 return vector_types[type_suffixes[type].vector_type].acle_name + 2;
942 /* Return the type of argument I, or error_mark_node if it isn't
945 function_resolver::get_argument_type (unsigned int i)
947 tree arg = m_arglist[i];
948 return arg == error_mark_node ? arg : TREE_TYPE (arg);
951 /* Return true if argument I is some form of scalar value. */
953 function_resolver::scalar_argument_p (unsigned int i)
955 tree type = get_argument_type (i);
956 return (INTEGRAL_TYPE_P (type)
957 /* Allow pointer types, leaving the frontend to warn where
959 || POINTER_TYPE_P (type)
960 || SCALAR_FLOAT_TYPE_P (type));
963 /* Report that the function has no form that takes type suffix TYPE.
964 Return error_mark_node. */
966 function_resolver::report_no_such_form (type_suffix_index type)
968 error_at (location, "%qE has no form that takes %qT arguments",
969 fndecl, get_vector_type (type));
970 return error_mark_node;
973 /* Silently check whether there is an instance of the function with the
974 mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1.
975 Return its function decl if so, otherwise return null. */
977 function_resolver::lookup_form (mode_suffix_index mode,
978 type_suffix_index type0,
979 type_suffix_index type1)
981 type_suffix_pair types = { type0, type1 };
982 function_instance instance (base_name, base, shape, mode, types, pred);
983 registered_function *rfn
984 = function_table->find_with_hash (instance, instance.hash ());
985 return rfn ? rfn->decl : NULL_TREE;
988 /* Resolve the function to one with the mode suffix given by MODE and the
989 type suffixes given by TYPE0 and TYPE1. Return its function decl on
990 success, otherwise report an error and return error_mark_node. */
992 function_resolver::resolve_to (mode_suffix_index mode,
993 type_suffix_index type0,
994 type_suffix_index type1)
996 tree res = lookup_form (mode, type0, type1);
999 if (type1 == NUM_TYPE_SUFFIXES)
1000 return report_no_such_form (type0);
1001 if (type0 == type_suffix_ids[0])
1002 return report_no_such_form (type1);
1003 /* To be filled in when we have other cases. */
1009 /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
1010 Return the associated type suffix on success, otherwise report an
1011 error and return NUM_TYPE_SUFFIXES. */
1013 function_resolver::infer_integer_scalar_type (unsigned int argno)
1015 tree actual = get_argument_type (argno);
1016 if (actual == error_mark_node)
1017 return NUM_TYPE_SUFFIXES;
1019 /* Allow enums and booleans to decay to integers, for compatibility
1020 with C++ overloading rules. */
1021 if (INTEGRAL_TYPE_P (actual))
1023 bool uns_p = TYPE_UNSIGNED (actual);
1024 /* Honor the usual integer promotions, so that resolution works
1025 in the same way as for C++. */
1026 if (TYPE_PRECISION (actual) < 32)
1027 return TYPE_SUFFIX_s32;
1028 if (TYPE_PRECISION (actual) == 32)
1029 return uns_p ? TYPE_SUFFIX_u32 : TYPE_SUFFIX_s32;
1030 if (TYPE_PRECISION (actual) == 64)
1031 return uns_p ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64;
1034 error_at (location, "passing %qT to argument %d of %qE, which expects"
1035 " a 32-bit or 64-bit integer type", actual, argno + 1, fndecl);
1036 return NUM_TYPE_SUFFIXES;
1039 /* Require argument ARGNO to be a pointer to a scalar type that has a
1040 corresponding type suffix. Return that type suffix on success,
1041 otherwise report an error and return NUM_TYPE_SUFFIXES.
1042 GATHER_SCATTER_P is true if the function is a gather/scatter
1043 operation, and so requires a pointer to 32-bit or 64-bit data. */
1045 function_resolver::infer_pointer_type (unsigned int argno,
1046 bool gather_scatter_p)
1048 tree actual = get_argument_type (argno);
1049 if (actual == error_mark_node)
1050 return NUM_TYPE_SUFFIXES;
1052 if (TREE_CODE (actual) != POINTER_TYPE)
1054 error_at (location, "passing %qT to argument %d of %qE, which"
1055 " expects a pointer type", actual, argno + 1, fndecl);
1056 if (VECTOR_TYPE_P (actual) && gather_scatter_p)
1057 inform (location, "an explicit type suffix is needed"
1058 " when using a vector of base addresses");
1059 return NUM_TYPE_SUFFIXES;
1062 tree target = TREE_TYPE (actual);
1063 type_suffix_index type = find_type_suffix_for_scalar_type (target);
1064 if (type == NUM_TYPE_SUFFIXES)
1066 error_at (location, "passing %qT to argument %d of %qE, but %qT is not"
1067 " a valid SVE element type", actual, argno + 1, fndecl,
1069 return NUM_TYPE_SUFFIXES;
1071 unsigned int bits = type_suffixes[type].element_bits;
1072 if (gather_scatter_p && bits != 32 && bits != 64)
1074 error_at (location, "passing %qT to argument %d of %qE, which"
1075 " expects a pointer to 32-bit or 64-bit elements",
1076 actual, argno + 1, fndecl);
1077 return NUM_TYPE_SUFFIXES;
1083 /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
1084 vectors; NUM_VECTORS is 1 for the former. Return the associated type
1085 suffix on success, using TYPE_SUFFIX_b for predicates. Report an error
1086 and return NUM_TYPE_SUFFIXES on failure. */
1088 function_resolver::infer_vector_or_tuple_type (unsigned int argno,
1089 unsigned int num_vectors)
1091 tree actual = get_argument_type (argno);
1092 if (actual == error_mark_node)
1093 return NUM_TYPE_SUFFIXES;
1095 /* A linear search should be OK here, since the code isn't hot and
1096 the number of types is only small. */
1097 for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
1098 for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
1100 vector_type_index type_i = type_suffixes[suffix_i].vector_type;
1101 tree type = acle_vector_types[size_i][type_i];
1102 if (type && TYPE_MAIN_VARIANT (actual) == TYPE_MAIN_VARIANT (type))
1104 if (size_i + 1 == num_vectors)
1105 return type_suffix_index (suffix_i);
1107 if (num_vectors == 1)
1108 error_at (location, "passing %qT to argument %d of %qE, which"
1109 " expects a single SVE vector rather than a tuple",
1110 actual, argno + 1, fndecl);
1111 else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t)
1112 error_at (location, "passing single vector %qT to argument %d"
1113 " of %qE, which expects a tuple of %d vectors",
1114 actual, argno + 1, fndecl, num_vectors);
1116 error_at (location, "passing %qT to argument %d of %qE, which"
1117 " expects a tuple of %d vectors", actual, argno + 1,
1118 fndecl, num_vectors);
1119 return NUM_TYPE_SUFFIXES;
1123 if (num_vectors == 1)
1124 error_at (location, "passing %qT to argument %d of %qE, which"
1125 " expects an SVE vector type", actual, argno + 1, fndecl);
1127 error_at (location, "passing %qT to argument %d of %qE, which"
1128 " expects an SVE tuple type", actual, argno + 1, fndecl);
1129 return NUM_TYPE_SUFFIXES;
1132 /* Require argument ARGNO to have some form of vector type. Return the
1133 associated type suffix on success, using TYPE_SUFFIX_b for predicates.
1134 Report an error and return NUM_TYPE_SUFFIXES on failure. */
1136 function_resolver::infer_vector_type (unsigned int argno)
1138 return infer_vector_or_tuple_type (argno, 1);
1141 /* Like infer_vector_type, but also require the type to be integral. */
1143 function_resolver::infer_integer_vector_type (unsigned int argno)
1145 type_suffix_index type = infer_vector_type (argno);
1146 if (type == NUM_TYPE_SUFFIXES)
1149 if (!type_suffixes[type].integer_p)
1151 error_at (location, "passing %qT to argument %d of %qE, which"
1152 " expects a vector of integers", get_argument_type (argno),
1154 return NUM_TYPE_SUFFIXES;
1160 /* Like infer_vector_type, but also require the type to be an unsigned
1163 function_resolver::infer_unsigned_vector_type (unsigned int argno)
1165 type_suffix_index type = infer_vector_type (argno);
1166 if (type == NUM_TYPE_SUFFIXES)
1169 if (!type_suffixes[type].unsigned_p)
1171 error_at (location, "passing %qT to argument %d of %qE, which"
1172 " expects a vector of unsigned integers",
1173 get_argument_type (argno), argno + 1, fndecl);
1174 return NUM_TYPE_SUFFIXES;
1180 /* Like infer_vector_type, but also require the element size to be
1183 function_resolver::infer_sd_vector_type (unsigned int argno)
1185 type_suffix_index type = infer_vector_type (argno);
1186 if (type == NUM_TYPE_SUFFIXES)
1189 unsigned int bits = type_suffixes[type].element_bits;
1190 if (bits != 32 && bits != 64)
1192 error_at (location, "passing %qT to argument %d of %qE, which"
1193 " expects a vector of 32-bit or 64-bit elements",
1194 get_argument_type (argno), argno + 1, fndecl);
1195 return NUM_TYPE_SUFFIXES;
1201 /* If the function operates on tuples of vectors, require argument ARGNO to be
1202 a tuple with the appropriate number of vectors, otherwise require it to be
1203 a single vector. Return the associated type suffix on success, using
1204 TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES
1207 function_resolver::infer_tuple_type (unsigned int argno)
1209 return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
1212 /* Require argument ARGNO to be a vector or scalar argument. Return true
1213 if it is, otherwise report an appropriate error. */
1215 function_resolver::require_vector_or_scalar_type (unsigned int argno)
1217 tree actual = get_argument_type (argno);
1218 if (actual == error_mark_node)
1221 if (!scalar_argument_p (argno) && !VECTOR_TYPE_P (actual))
1223 error_at (location, "passing %qT to argument %d of %qE, which"
1224 " expects a vector or scalar type", actual, argno + 1, fndecl);
1231 /* Require argument ARGNO to have vector type TYPE, in cases where this
1232 requirement holds for all uses of the function. Return true if the
1233 argument has the right form, otherwise report an appropriate error. */
1235 function_resolver::require_vector_type (unsigned int argno,
1236 vector_type_index type)
1238 tree expected = acle_vector_types[0][type];
1239 tree actual = get_argument_type (argno);
1240 if (actual != error_mark_node
1241 && TYPE_MAIN_VARIANT (expected) != TYPE_MAIN_VARIANT (actual))
1243 error_at (location, "passing %qT to argument %d of %qE, which"
1244 " expects %qT", actual, argno + 1, fndecl, expected);
1250 /* Like require_vector_type, but TYPE is inferred from previous arguments
1251 rather than being a fixed part of the function signature. This changes
1252 the nature of the error messages. */
1254 function_resolver::require_matching_vector_type (unsigned int argno,
1255 type_suffix_index type)
1257 type_suffix_index new_type = infer_vector_type (argno);
1258 if (new_type == NUM_TYPE_SUFFIXES)
1261 if (type != new_type)
1263 error_at (location, "passing %qT to argument %d of %qE, but"
1264 " previous arguments had type %qT",
1265 get_vector_type (new_type), argno + 1, fndecl,
1266 get_vector_type (type));
1272 /* Require argument ARGNO to be a vector type with the following properties:
1274 - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS
1275 is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
1277 - the element size must be:
1279 - the same as FIRST_TYPE's if EXPECTED_BITS == SAME_SIZE
1280 - half of FIRST_TYPE's if EXPECTED_BITS == HALF_SIZE
1281 - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE
1282 - EXPECTED_BITS itself otherwise
1284 Return true if the argument has the required type, otherwise report
1285 an appropriate error.
1287 FIRST_ARGNO is the first argument that is known to have type FIRST_TYPE.
1288 Usually it comes before ARGNO, but sometimes it is more natural to resolve
1289 arguments out of order.
1291 If the required properties depend on FIRST_TYPE then both FIRST_ARGNO and
1292 ARGNO contribute to the resolution process. If the required properties
1293 are fixed, only FIRST_ARGNO contributes to the resolution process.
1295 This function is a bit of a Swiss army knife. The complication comes
1296 from trying to give good error messages when FIRST_ARGNO and ARGNO are
1297 inconsistent, since either of them might be wrong. */
1298 bool function_resolver::
1299 require_derived_vector_type (unsigned int argno,
1300 unsigned int first_argno,
1301 type_suffix_index first_type,
1302 type_class_index expected_tclass,
1303 unsigned int expected_bits)
1305 /* If the type needs to match FIRST_ARGNO exactly, use the preferred
1306 error message for that case. The VECTOR_TYPE_P test excludes tuple
1307 types, which we handle below instead. */
1308 bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno));
1310 && expected_tclass == SAME_TYPE_CLASS
1311 && expected_bits == SAME_SIZE)
1313 /* There's no need to resolve this case out of order. */
1314 gcc_assert (argno > first_argno);
1315 return require_matching_vector_type (argno, first_type);
1318 /* Use FIRST_TYPE to get the expected type class and element size. */
1319 type_class_index orig_expected_tclass = expected_tclass;
1320 if (expected_tclass == NUM_TYPE_CLASSES)
1321 expected_tclass = type_suffixes[first_type].tclass;
1323 unsigned int orig_expected_bits = expected_bits;
1324 if (expected_bits == SAME_SIZE)
1325 expected_bits = type_suffixes[first_type].element_bits;
1326 else if (expected_bits == HALF_SIZE)
1327 expected_bits = type_suffixes[first_type].element_bits / 2;
1328 else if (expected_bits == QUARTER_SIZE)
1329 expected_bits = type_suffixes[first_type].element_bits / 4;
1331 /* If the expected type doesn't depend on FIRST_TYPE at all,
1332 just check for the fixed choice of vector type. */
1333 if (expected_tclass == orig_expected_tclass
1334 && expected_bits == orig_expected_bits)
1336 const type_suffix_info &expected_suffix
1337 = type_suffixes[find_type_suffix (expected_tclass, expected_bits)];
1338 return require_vector_type (argno, expected_suffix.vector_type);
1341 /* Require the argument to be some form of SVE vector type,
1342 without being specific about the type of vector we want. */
1343 type_suffix_index actual_type = infer_vector_type (argno);
1344 if (actual_type == NUM_TYPE_SUFFIXES)
1347 /* Exit now if we got the right type. */
1348 bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass);
1349 bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits);
1350 if (tclass_ok_p && size_ok_p)
1353 /* First look for cases in which the actual type contravenes a fixed
1354 size requirement, without having to refer to FIRST_TYPE. */
1355 if (!size_ok_p && expected_bits == orig_expected_bits)
1357 error_at (location, "passing %qT to argument %d of %qE, which"
1358 " expects a vector of %d-bit elements",
1359 get_vector_type (actual_type), argno + 1, fndecl,
1364 /* Likewise for a fixed type class requirement. This is only ever
1365 needed for signed and unsigned types, so don't create unnecessary
1366 translation work for other type classes. */
1367 if (!tclass_ok_p && orig_expected_tclass == TYPE_signed)
1369 error_at (location, "passing %qT to argument %d of %qE, which"
1370 " expects a vector of signed integers",
1371 get_vector_type (actual_type), argno + 1, fndecl);
1374 if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned)
1376 error_at (location, "passing %qT to argument %d of %qE, which"
1377 " expects a vector of unsigned integers",
1378 get_vector_type (actual_type), argno + 1, fndecl);
1382 /* Make sure that FIRST_TYPE itself is sensible before using it
1383 as a basis for an error message. */
1384 if (resolve_to (mode_suffix_id, first_type) == error_mark_node)
1387 /* If the arguments have consistent type classes, but a link between
1388 the sizes has been broken, try to describe the error in those terms. */
1389 if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE)
1391 if (argno < first_argno)
1393 std::swap (argno, first_argno);
1394 std::swap (actual_type, first_type);
1396 error_at (location, "arguments %d and %d of %qE must have the"
1397 " same element size, but the values passed here have type"
1398 " %qT and %qT respectively", first_argno + 1, argno + 1,
1399 fndecl, get_vector_type (first_type),
1400 get_vector_type (actual_type));
1404 /* Likewise in reverse: look for cases in which the sizes are consistent
1405 but a link between the type classes has been broken. */
1408 && orig_expected_tclass == SAME_TYPE_CLASS
1409 && type_suffixes[first_type].integer_p
1410 && type_suffixes[actual_type].integer_p)
1412 if (argno < first_argno)
1414 std::swap (argno, first_argno);
1415 std::swap (actual_type, first_type);
1417 error_at (location, "arguments %d and %d of %qE must have the"
1418 " same signedness, but the values passed here have type"
1419 " %qT and %qT respectively", first_argno + 1, argno + 1,
1420 fndecl, get_vector_type (first_type),
1421 get_vector_type (actual_type));
1425 /* The two arguments are wildly inconsistent. */
1426 type_suffix_index expected_type
1427 = find_type_suffix (expected_tclass, expected_bits);
1428 error_at (location, "passing %qT instead of the expected %qT to argument"
1429 " %d of %qE, after passing %qT to argument %d",
1430 get_vector_type (actual_type), get_vector_type (expected_type),
1431 argno + 1, fndecl, get_argument_type (first_argno),
1436 /* Require argument ARGNO to be a (possibly variable) scalar, using EXPECTED
1437 as the name of its expected type. Return true if the argument has the
1438 right form, otherwise report an appropriate error. */
1440 function_resolver::require_scalar_type (unsigned int argno,
1441 const char *expected)
1443 if (!scalar_argument_p (argno))
1445 error_at (location, "passing %qT to argument %d of %qE, which"
1446 " expects %qs", get_argument_type (argno), argno + 1,
1453 /* Require argument ARGNO to be some form of pointer, without being specific
1454 about its target type. Return true if the argument has the right form,
1455 otherwise report an appropriate error. */
1457 function_resolver::require_pointer_type (unsigned int argno)
1459 if (!scalar_argument_p (argno))
1461 error_at (location, "passing %qT to argument %d of %qE, which"
1462 " expects a scalar pointer", get_argument_type (argno),
1469 /* Argument FIRST_ARGNO is a scalar with type EXPECTED_TYPE, and argument
1470 ARGNO should be consistent with it. Return true if it is, otherwise
1471 report an appropriate error. */
1472 bool function_resolver::
1473 require_matching_integer_scalar_type (unsigned int argno,
1474 unsigned int first_argno,
1475 type_suffix_index expected_type)
1477 type_suffix_index actual_type = infer_integer_scalar_type (argno);
1478 if (actual_type == NUM_TYPE_SUFFIXES)
1481 if (actual_type == expected_type)
1484 error_at (location, "call to %qE is ambiguous; argument %d has type"
1485 " %qs but argument %d has type %qs", fndecl,
1486 first_argno + 1, get_scalar_type_name (expected_type),
1487 argno + 1, get_scalar_type_name (actual_type));
1491 /* Require argument ARGNO to be a (possibly variable) scalar, expecting it
1492 to have the following properties:
1494 - the type class must be the same as for type suffix 0 if EXPECTED_TCLASS
1495 is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself.
1497 - the element size must be the same as for type suffix 0 if EXPECTED_BITS
1498 is SAME_TYPE_SIZE, otherwise it must be EXPECTED_BITS itself.
1500 Return true if the argument is valid, otherwise report an appropriate error.
1502 Note that we don't check whether the scalar type actually has the required
1503 properties, since that's subject to implicit promotions and conversions.
1504 Instead we just use the expected properties to tune the error message. */
1505 bool function_resolver::
1506 require_derived_scalar_type (unsigned int argno,
1507 type_class_index expected_tclass,
1508 unsigned int expected_bits)
1510 gcc_assert (expected_tclass == SAME_TYPE_CLASS
1511 || expected_tclass == TYPE_signed
1512 || expected_tclass == TYPE_unsigned);
1514 /* If the expected type doesn't depend on the type suffix at all,
1515 just check for the fixed choice of scalar type. */
1516 if (expected_tclass != SAME_TYPE_CLASS && expected_bits != SAME_SIZE)
1518 type_suffix_index expected_type
1519 = find_type_suffix (expected_tclass, expected_bits);
1520 return require_scalar_type (argno, get_scalar_type_name (expected_type));
1523 if (scalar_argument_p (argno))
1526 if (expected_tclass == SAME_TYPE_CLASS)
1527 /* It doesn't really matter whether the element is expected to be
1528 the same size as type suffix 0. */
1529 error_at (location, "passing %qT to argument %d of %qE, which"
1530 " expects a scalar element", get_argument_type (argno),
1533 /* It doesn't seem useful to distinguish between signed and unsigned
1535 error_at (location, "passing %qT to argument %d of %qE, which"
1536 " expects a scalar integer", get_argument_type (argno),
1541 /* Require argument ARGNO to be suitable for an integer constant expression.
1542 Return true if it is, otherwise report an appropriate error.
1544 function_checker checks whether the argument is actually constant and
1545 has a suitable range. The reason for distinguishing immediate arguments
1546 here is because it provides more consistent error messages than
1547 require_scalar_type would. */
1549 function_resolver::require_integer_immediate (unsigned int argno)
1551 if (!scalar_argument_p (argno))
1553 report_non_ice (location, fndecl, argno);
1559 /* Require argument ARGNO to be a vector base in a gather-style address.
1560 Return its type on success, otherwise return NUM_VECTOR_TYPES. */
1562 function_resolver::infer_vector_base_type (unsigned int argno)
1564 type_suffix_index type = infer_vector_type (argno);
1565 if (type == NUM_TYPE_SUFFIXES)
1566 return NUM_VECTOR_TYPES;
1568 if (type == TYPE_SUFFIX_u32 || type == TYPE_SUFFIX_u64)
1569 return type_suffixes[type].vector_type;
1571 error_at (location, "passing %qT to argument %d of %qE, which"
1572 " expects %qs or %qs", get_argument_type (argno),
1573 argno + 1, fndecl, "svuint32_t", "svuint64_t");
1574 return NUM_VECTOR_TYPES;
1577 /* Require argument ARGNO to be a vector displacement in a gather-style
1578 address. Return its type on success, otherwise return NUM_VECTOR_TYPES. */
1580 function_resolver::infer_vector_displacement_type (unsigned int argno)
1582 type_suffix_index type = infer_integer_vector_type (argno);
1583 if (type == NUM_TYPE_SUFFIXES)
1584 return NUM_VECTOR_TYPES;
1586 if (type_suffixes[type].integer_p
1587 && (type_suffixes[type].element_bits == 32
1588 || type_suffixes[type].element_bits == 64))
1589 return type_suffixes[type].vector_type;
1591 error_at (location, "passing %qT to argument %d of %qE, which"
1592 " expects a vector of 32-bit or 64-bit integers",
1593 get_argument_type (argno), argno + 1, fndecl);
1594 return NUM_VECTOR_TYPES;
1597 /* Require argument ARGNO to be a vector displacement in a gather-style
1598 address. There are three possible uses:
1600 - for loading into elements of type TYPE (when LOAD_P is true)
1601 - for storing from elements of type TYPE (when LOAD_P is false)
1602 - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES)
1604 The overloaded function's mode suffix determines the units of the
1605 displacement (bytes for "_offset", elements for "_index").
1607 Return the associated mode on success, otherwise report an error
1608 and return MODE_none. */
1610 function_resolver::resolve_sv_displacement (unsigned int argno,
1611 type_suffix_index type,
1614 if (type == NUM_TYPE_SUFFIXES)
1616 /* For prefetches, the base is a void pointer and the displacement
1617 can be any valid offset or index type. */
1618 vector_type_index displacement_vector_type
1619 = infer_vector_displacement_type (argno);
1620 if (displacement_vector_type == NUM_VECTOR_TYPES)
1623 mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES,
1624 displacement_vector_type,
1625 displacement_units ());
1626 gcc_assert (mode != MODE_none);
1630 /* Check for some form of vector type, without naming any in particular
1631 as being expected. */
1632 type_suffix_index displacement_type = infer_vector_type (argno);
1633 if (displacement_type == NUM_TYPE_SUFFIXES)
1636 /* If the displacement type is consistent with the data vector type,
1637 try to find the associated mode suffix. This will fall through
1638 for non-integral displacement types. */
1639 unsigned int required_bits = type_suffixes[type].element_bits;
1640 if (type_suffixes[displacement_type].element_bits == required_bits)
1642 vector_type_index displacement_vector_type
1643 = type_suffixes[displacement_type].vector_type;
1644 mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES,
1645 displacement_vector_type,
1646 displacement_units ());
1647 if (mode != MODE_none)
1651 if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
1653 /* TYPE has been inferred rather than specified by the user,
1654 so mention it in the error messages. */
1656 error_at (location, "passing %qT to argument %d of %qE, which when"
1657 " loading %qT expects a vector of %d-bit integers",
1658 get_argument_type (argno), argno + 1, fndecl,
1659 get_vector_type (type), required_bits);
1661 error_at (location, "passing %qT to argument %d of %qE, which when"
1662 " storing %qT expects a vector of %d-bit integers",
1663 get_argument_type (argno), argno + 1, fndecl,
1664 get_vector_type (type), required_bits);
1667 /* TYPE is part of the function name. */
1668 error_at (location, "passing %qT to argument %d of %qE, which"
1669 " expects a vector of %d-bit integers",
1670 get_argument_type (argno), argno + 1, fndecl, required_bits);
1674 /* Require the arguments starting at ARGNO to form a gather-style address.
1675 There are three possible uses:
1677 - for loading into elements of type TYPE (when LOAD_P is true)
1678 - for storing from elements of type TYPE (when LOAD_P is false)
1679 - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES)
1681 The three possible addresses are:
1683 - a vector base with no displacement
1684 - a vector base and a scalar displacement
1685 - a scalar (pointer) base and a vector displacement
1687 The overloaded function's mode suffix determines whether there is
1688 a displacement, and if so, what units it uses:
1690 - MODE_none: no displacement
1691 - MODE_offset: the displacement is measured in bytes
1692 - MODE_index: the displacement is measured in elements
1694 Return the mode of the non-overloaded function on success, otherwise
1695 report an error and return MODE_none. */
1697 function_resolver::resolve_gather_address (unsigned int argno,
1698 type_suffix_index type,
1701 tree actual = get_argument_type (argno);
1702 if (actual == error_mark_node)
1705 if (displacement_units () != UNITS_none)
1707 /* Some form of displacement is needed. First handle a scalar
1708 pointer base and a vector displacement. */
1709 if (scalar_argument_p (argno))
1710 /* Don't check the pointer type here, since there's only one valid
1711 choice. Leave that to the frontend. */
1712 return resolve_sv_displacement (argno + 1, type, load_p);
1714 if (!VECTOR_TYPE_P (actual))
1716 error_at (location, "passing %qT to argument %d of %qE,"
1717 " which expects a vector or pointer base address",
1718 actual, argno + 1, fndecl);
1723 /* Check for the correct choice of vector base type. */
1724 vector_type_index base_vector_type;
1725 if (type == NUM_TYPE_SUFFIXES)
1727 /* Since prefetches have no type suffix, there is a free choice
1728 between 32-bit and 64-bit base addresses. */
1729 base_vector_type = infer_vector_base_type (argno);
1730 if (base_vector_type == NUM_VECTOR_TYPES)
1735 /* Check for some form of vector type, without saying which type
1737 type_suffix_index base_type = infer_vector_type (argno);
1738 if (base_type == NUM_TYPE_SUFFIXES)
1741 /* Check whether the type is the right one. */
1742 unsigned int required_bits = type_suffixes[type].element_bits;
1743 gcc_assert (required_bits == 32 || required_bits == 64);
1744 type_suffix_index required_type = (required_bits == 32
1747 if (required_type != base_type)
1749 error_at (location, "passing %qT to argument %d of %qE,"
1750 " which expects %qT", actual, argno + 1, fndecl,
1751 get_vector_type (required_type));
1754 base_vector_type = type_suffixes[base_type].vector_type;
1757 /* Check the scalar displacement, if any. */
1758 if (displacement_units () != UNITS_none
1759 && !require_scalar_type (argno + 1, "int64_t"))
1762 /* Find the appropriate mode suffix. The checks above should have
1763 weeded out all erroneous cases. */
1764 for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i)
1766 const mode_suffix_info &mode = mode_suffixes[mode_i];
1767 if (mode.base_vector_type == base_vector_type
1768 && mode.displacement_vector_type == NUM_VECTOR_TYPES
1769 && mode.displacement_units == displacement_units ())
1770 return mode_suffix_index (mode_i);
1776 /* Require arguments ARGNO and ARGNO + 1 to form an ADR-style address,
1777 i.e. one with a vector of base addresses and a vector of displacements.
1778 The overloaded function's mode suffix determines the units of the
1779 displacement (bytes for "_offset", elements for "_index").
1781 Return the associated mode suffix on success, otherwise report
1782 an error and return MODE_none. */
1784 function_resolver::resolve_adr_address (unsigned int argno)
1786 vector_type_index base_type = infer_vector_base_type (argno);
1787 if (base_type == NUM_VECTOR_TYPES)
1790 vector_type_index displacement_type
1791 = infer_vector_displacement_type (argno + 1);
1792 if (displacement_type == NUM_VECTOR_TYPES)
1795 mode_suffix_index mode = find_mode_suffix (base_type, displacement_type,
1796 displacement_units ());
1797 if (mode == MODE_none)
1799 if (mode_suffix_id == MODE_offset)
1800 error_at (location, "cannot combine a base of type %qT with"
1801 " an offset of type %qT",
1802 get_argument_type (argno), get_argument_type (argno + 1));
1804 error_at (location, "cannot combine a base of type %qT with"
1805 " an index of type %qT",
1806 get_argument_type (argno), get_argument_type (argno + 1));
1811 /* Require the function to have exactly EXPECTED arguments. Return true
1812 if it does, otherwise report an appropriate error. */
1814 function_resolver::check_num_arguments (unsigned int expected)
1816 if (m_arglist.length () < expected)
1817 error_at (location, "too few arguments to function %qE", fndecl);
1818 else if (m_arglist.length () > expected)
1819 error_at (location, "too many arguments to function %qE", fndecl);
1820 return m_arglist.length () == expected;
1823 /* If the function is predicated, check that the first argument is a
1824 suitable governing predicate. Also check that there are NOPS further
1825 arguments after any governing predicate, but don't check what they are.
1827 Return true on success, otherwise report a suitable error.
1828 When returning true:
1830 - set I to the number of the first unchecked argument.
1831 - set NARGS to the total number of arguments. */
1833 function_resolver::check_gp_argument (unsigned int nops,
1834 unsigned int &i, unsigned int &nargs)
1837 if (pred != PRED_none)
1839 /* Unary merge operations should use resolve_unary instead. */
1840 gcc_assert (nops != 1 || pred != PRED_m);
1842 if (!check_num_arguments (nargs)
1843 || !require_vector_type (i, VECTOR_TYPE_svbool_t))
1850 if (!check_num_arguments (nargs))
1857 /* Finish resolving a function whose final argument can be a vector
1858 or a scalar, with the function having an implicit "_n" suffix
1859 in the latter case. This "_n" form might only exist for certain
1862 ARGNO is the index of the final argument. The inferred type
1863 suffix is FIRST_TYPE, which was obtained from argument FIRST_ARGNO.
1864 EXPECTED_TCLASS and EXPECTED_BITS describe the expected properties
1865 of the final vector or scalar argument, in the same way as for
1866 require_derived_vector_type.
1868 Return the function decl of the resolved function on success,
1869 otherwise report a suitable error and return error_mark_node. */
1870 tree function_resolver::
1871 finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
1872 type_suffix_index first_type,
1873 type_class_index expected_tclass,
1874 unsigned int expected_bits)
1876 tree scalar_form = lookup_form (MODE_n, first_type);
1878 /* Allow the final argument to be scalar, if an _n form exists. */
1879 if (scalar_argument_p (argno))
1884 /* Check the vector form normally. If that succeeds, raise an
1885 error about having no corresponding _n form. */
1886 tree res = resolve_to (mode_suffix_id, first_type);
1887 if (res != error_mark_node)
1888 error_at (location, "passing %qT to argument %d of %qE, but its"
1889 " %qT form does not accept scalars",
1890 get_argument_type (argno), argno + 1, fndecl,
1891 get_vector_type (first_type));
1892 return error_mark_node;
1895 /* If an _n form does exist, provide a more accurate message than
1896 require_derived_vector_type would for arguments that are neither
1897 vectors nor scalars. */
1898 if (scalar_form && !require_vector_or_scalar_type (argno))
1899 return error_mark_node;
1901 /* Check for the correct vector type. */
1902 if (!require_derived_vector_type (argno, first_argno, first_type,
1903 expected_tclass, expected_bits))
1904 return error_mark_node;
1906 return resolve_to (mode_suffix_id, first_type);
1909 /* Resolve a (possibly predicated) unary function. If the function uses
1910 merge predication, there is an extra vector argument before the
1911 governing predicate that specifies the values of inactive elements.
1912 This argument has the following properties:
1914 - the type class must be the same as for active elements if MERGE_TCLASS
1915 is SAME_TYPE_CLASS, otherwise it must be MERGE_TCLASS itself.
1917 - the element size must be the same as for active elements if MERGE_BITS
1918 is SAME_TYPE_SIZE, otherwise it must be MERGE_BITS itself.
1920 Return the function decl of the resolved function on success,
1921 otherwise report a suitable error and return error_mark_node. */
1923 function_resolver::resolve_unary (type_class_index merge_tclass,
1924 unsigned int merge_bits)
1926 type_suffix_index type;
1929 if (!check_num_arguments (3))
1930 return error_mark_node;
1931 if (merge_tclass == SAME_TYPE_CLASS && merge_bits == SAME_SIZE)
1933 /* The inactive elements are the same as the active elements,
1934 so we can use normal left-to-right resolution. */
1935 if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
1936 || !require_vector_type (1, VECTOR_TYPE_svbool_t)
1937 || !require_matching_vector_type (2, type))
1938 return error_mark_node;
1942 /* The inactive element type is a function of the active one,
1943 so resolve the active one first. */
1944 if (!require_vector_type (1, VECTOR_TYPE_svbool_t)
1945 || (type = infer_vector_type (2)) == NUM_TYPE_SUFFIXES
1946 || !require_derived_vector_type (0, 2, type, merge_tclass,
1948 return error_mark_node;
1953 /* We just need to check the predicate (if any) and the single
1955 unsigned int i, nargs;
1956 if (!check_gp_argument (1, i, nargs)
1957 || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
1958 return error_mark_node;
1961 /* Handle convert-like functions in which the first type suffix is
1963 if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES)
1964 return resolve_to (mode_suffix_id, type_suffix_ids[0], type);
1966 return resolve_to (mode_suffix_id, type);
1969 /* Resolve a (possibly predicated) function that takes NOPS like-typed
1970 vector arguments followed by NIMM integer immediates. Return the
1971 function decl of the resolved function on success, otherwise report
1972 a suitable error and return error_mark_node. */
1974 function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm)
1976 unsigned int i, nargs;
1977 type_suffix_index type;
1978 if (!check_gp_argument (nops + nimm, i, nargs)
1979 || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
1980 return error_mark_node;
1983 for (; i < nargs - nimm; ++i)
1984 if (!require_matching_vector_type (i, type))
1985 return error_mark_node;
1987 for (; i < nargs; ++i)
1988 if (!require_integer_immediate (i))
1989 return error_mark_node;
1991 return resolve_to (mode_suffix_id, type);
1994 /* Resolve a (possibly predicated) function that offers a choice between
1997 - NOPS like-typed vector arguments or
1998 - NOPS - 1 like-typed vector arguments followed by a scalar argument
2000 Return the function decl of the resolved function on success,
2001 otherwise report a suitable error and return error_mark_node. */
2003 function_resolver::resolve_uniform_opt_n (unsigned int nops)
2005 unsigned int i, nargs;
2006 type_suffix_index type;
2007 if (!check_gp_argument (nops, i, nargs)
2008 || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
2009 return error_mark_node;
2011 unsigned int first_arg = i++;
2012 for (; i < nargs - 1; ++i)
2013 if (!require_matching_vector_type (i, type))
2014 return error_mark_node;
2016 return finish_opt_n_resolution (i, first_arg, type);
2019 /* If the call is erroneous, report an appropriate error and return
2020 error_mark_node. Otherwise, if the function is overloaded, return
2021 the decl of the non-overloaded function. Return NULL_TREE otherwise,
2022 indicating that the call should be processed in the normal way. */
2024 function_resolver::resolve ()
2026 return shape->resolve (*this);
2029 function_checker::function_checker (location_t location,
2030 const function_instance &instance,
2031 tree fndecl, tree fntype,
2032 unsigned int nargs, tree *args)
2033 : function_call_info (location, instance, fndecl),
2034 m_fntype (fntype), m_nargs (nargs), m_args (args),
2035 /* We don't have to worry about unary _m operations here, since they
2036 never have arguments that need checking. */
2037 m_base_arg (pred != PRED_none ? 1 : 0)
2041 /* Return true if argument ARGNO exists. which it might not for
2042 erroneous calls. It is safe to wave through checks if this
2043 function returns false. */
2045 function_checker::argument_exists_p (unsigned int argno)
2047 gcc_assert (argno < (unsigned int) type_num_arguments (m_fntype));
2048 return argno < m_nargs;
2051 /* Check that argument ARGNO is an integer constant expression and
2052 store its value in VALUE_OUT if so. The caller should first
2053 check that argument ARGNO exists. */
2055 function_checker::require_immediate (unsigned int argno,
2056 HOST_WIDE_INT &value_out)
2058 gcc_assert (argno < m_nargs);
2059 tree arg = m_args[argno];
2061 /* The type and range are unsigned, so read the argument as an
2062 unsigned rather than signed HWI. */
2063 if (!tree_fits_uhwi_p (arg))
2065 report_non_ice (location, fndecl, argno);
2069 /* ...but treat VALUE_OUT as signed for error reporting, since printing
2070 -1 is more user-friendly than the maximum uint64_t value. */
2071 value_out = tree_to_uhwi (arg);
2075 /* Check that argument REL_ARGNO is an integer constant expression that
2076 has the value VALUE0 or VALUE1. REL_ARGNO counts from the end of the
2077 predication arguments. */
2079 function_checker::require_immediate_either_or (unsigned int rel_argno,
2080 HOST_WIDE_INT value0,
2081 HOST_WIDE_INT value1)
2083 unsigned int argno = m_base_arg + rel_argno;
2084 if (!argument_exists_p (argno))
2087 HOST_WIDE_INT actual;
2088 if (!require_immediate (argno, actual))
2091 if (actual != value0 && actual != value1)
2093 report_neither_nor (location, fndecl, argno, actual, 90, 270);
2100 /* Check that argument REL_ARGNO is an integer constant expression that has
2101 a valid value for enumeration type TYPE. REL_ARGNO counts from the end
2102 of the predication arguments. */
2104 function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
2106 unsigned int argno = m_base_arg + rel_argno;
2107 if (!argument_exists_p (argno))
2110 HOST_WIDE_INT actual;
2111 if (!require_immediate (argno, actual))
2114 for (tree entry = TYPE_VALUES (type); entry; entry = TREE_CHAIN (entry))
2116 /* The value is an INTEGER_CST for C and a CONST_DECL wrapper
2117 around an INTEGER_CST for C++. */
2118 tree value = TREE_VALUE (entry);
2119 if (TREE_CODE (value) == CONST_DECL)
2120 value = DECL_INITIAL (value);
2121 if (wi::to_widest (value) == actual)
2125 report_not_enum (location, fndecl, argno, actual, type);
2129 /* Check that argument REL_ARGNO is suitable for indexing argument
2130 REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts
2131 from the end of the predication arguments. */
2133 function_checker::require_immediate_lane_index (unsigned int rel_argno,
2134 unsigned int group_size)
2136 unsigned int argno = m_base_arg + rel_argno;
2137 if (!argument_exists_p (argno))
2140 /* Get the type of the previous argument. tree_argument_type wants a
2141 1-based number, whereas ARGNO is 0-based. */
2142 machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno));
2143 gcc_assert (VECTOR_MODE_P (mode));
2144 unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode));
2145 return require_immediate_range (rel_argno, 0, nlanes - 1);
2148 /* Check that argument REL_ARGNO is an integer constant expression that
2149 has one of the given values. */
2151 function_checker::require_immediate_one_of (unsigned int rel_argno,
2152 HOST_WIDE_INT value0,
2153 HOST_WIDE_INT value1,
2154 HOST_WIDE_INT value2,
2155 HOST_WIDE_INT value3)
2157 unsigned int argno = m_base_arg + rel_argno;
2158 if (!argument_exists_p (argno))
2161 HOST_WIDE_INT actual;
2162 if (!require_immediate (argno, actual))
2165 if (actual != value0
2168 && actual != value3)
2170 report_not_one_of (location, fndecl, argno, actual,
2171 value0, value1, value2, value3);
2178 /* Check that argument REL_ARGNO is an integer constant expression in the
2179 range [MIN, MAX]. REL_ARGNO counts from the end of the predication
2182 function_checker::require_immediate_range (unsigned int rel_argno,
2186 unsigned int argno = m_base_arg + rel_argno;
2187 if (!argument_exists_p (argno))
2190 /* Required because of the tree_to_uhwi -> HOST_WIDE_INT conversion
2191 in require_immediate. */
2192 gcc_assert (min >= 0 && min <= max);
2193 HOST_WIDE_INT actual;
2194 if (!require_immediate (argno, actual))
2197 if (!IN_RANGE (actual, min, max))
2199 report_out_of_range (location, fndecl, argno, actual, min, max);
2206 /* Perform semantic checks on the call. Return true if the call is valid,
2207 otherwise report a suitable error. */
2209 function_checker::check ()
2211 function_args_iterator iter;
2214 FOREACH_FUNCTION_ARGS (m_fntype, type, iter)
2216 if (type == void_type_node || i >= m_nargs)
2220 && TREE_CODE (type) == ENUMERAL_TYPE
2221 && !require_immediate_enum (i - m_base_arg, type))
2227 return shape->check (*this);
2230 gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
2231 gimple_stmt_iterator *gsi_in, gcall *call_in)
2232 : function_call_info (gimple_location (call_in), instance, fndecl),
2233 gsi (gsi_in), call (call_in), lhs (gimple_call_lhs (call_in))
2237 /* Convert predicate argument ARGNO so that it has the type appropriate for
2238 an operation on VECTYPE. Add any new statements to STMTS. */
2240 gimple_folder::convert_pred (gimple_seq &stmts, tree vectype,
2243 tree predtype = truth_type_for (vectype);
2244 tree pred = gimple_call_arg (call, argno);
2245 return gimple_build (&stmts, VIEW_CONVERT_EXPR, predtype, pred);
2248 /* Return a pointer to the address in a contiguous load or store,
2249 given that each memory vector has type VECTYPE. Add any new
2250 statements to STMTS. */
2252 gimple_folder::fold_contiguous_base (gimple_seq &stmts, tree vectype)
2254 tree base = gimple_call_arg (call, 1);
2255 if (mode_suffix_id == MODE_vnum)
2257 tree offset = gimple_call_arg (call, 2);
2258 offset = gimple_convert (&stmts, sizetype, offset);
2259 offset = gimple_build (&stmts, MULT_EXPR, sizetype, offset,
2260 TYPE_SIZE_UNIT (vectype));
2261 base = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (base),
2267 /* Return the alignment and TBAA argument to an internal load or store
2268 function like IFN_MASK_LOAD or IFN_MASK_STORE, given that it accesses
2269 memory elements of type TYPE. */
2271 gimple_folder::load_store_cookie (tree type)
2273 return build_int_cst (build_pointer_type (type), TYPE_ALIGN_UNIT (type));
2276 /* Fold the call to a PTRUE, taking the element size from type suffix 0. */
2278 gimple_folder::fold_to_ptrue ()
2280 tree svbool_type = TREE_TYPE (lhs);
2281 tree bool_type = TREE_TYPE (svbool_type);
2282 unsigned int element_bytes = type_suffix (0).element_bytes;
2284 /* The return type is svbool_t for all type suffixes, thus for b8 we
2285 want { 1, 1, 1, 1, ... }, for b16 we want { 1, 0, 1, 0, ... }, etc. */
2286 tree_vector_builder builder (svbool_type, element_bytes, 1);
2287 builder.quick_push (build_all_ones_cst (bool_type));
2288 for (unsigned int i = 1; i < element_bytes; ++i)
2289 builder.quick_push (build_zero_cst (bool_type));
2290 return gimple_build_assign (lhs, builder.build ());
2293 /* Fold the call to a PFALSE. */
2295 gimple_folder::fold_to_pfalse ()
2297 return gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2300 /* Fold an operation to a constant predicate in which the first VL
2301 elements are set and the rest are clear. Take the element size
2302 from type suffix 0. */
2304 gimple_folder::fold_to_vl_pred (unsigned int vl)
2306 tree vectype = TREE_TYPE (lhs);
2307 tree element_type = TREE_TYPE (vectype);
2308 tree minus_one = build_all_ones_cst (element_type);
2309 tree zero = build_zero_cst (element_type);
2310 unsigned int element_bytes = type_suffix (0).element_bytes;
2312 /* Construct COUNT elements that contain the ptrue followed by
2313 a repeating sequence of COUNT elements. */
2314 unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vectype));
2315 gcc_assert (vl * element_bytes <= count);
2316 tree_vector_builder builder (vectype, count, 2);
2317 for (unsigned int i = 0; i < count * 2; ++i)
2319 bool bit = (i & (element_bytes - 1)) == 0 && i < vl * element_bytes;
2320 builder.quick_push (bit ? minus_one : zero);
2322 return gimple_build_assign (lhs, builder.build ());
2325 /* Try to fold the call. Return the new statement on success and null
2328 gimple_folder::fold ()
2330 /* Don't fold anything when SVE is disabled; emit an error during
2331 expansion instead. */
2335 /* Punt if the function has a return type and no result location is
2336 provided. The attributes should allow target-independent code to
2337 remove the calls if appropriate. */
2338 if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node)
2341 return base->fold (*this);
2344 function_expander::function_expander (const function_instance &instance,
2345 tree fndecl, tree call_expr_in,
2346 rtx possible_target_in)
2347 : function_call_info (EXPR_LOCATION (call_expr_in), instance, fndecl),
2348 call_expr (call_expr_in), possible_target (possible_target_in)
2352 /* Return the handler of direct optab OP for type suffix SUFFIX_I. */
2354 function_expander::direct_optab_handler (optab op, unsigned int suffix_i)
2356 return ::direct_optab_handler (op, vector_mode (suffix_i));
2359 /* Choose between signed and unsigned direct optabs SIGNED_OP and
2360 UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
2361 pick the appropriate optab handler for the mode. Use MODE as the
2362 mode if given, otherwise use the mode of type suffix SUFFIX_I. */
2364 function_expander::direct_optab_handler_for_sign (optab signed_op,
2366 unsigned int suffix_i,
2369 if (mode == VOIDmode)
2370 mode = vector_mode (suffix_i);
2371 optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op;
2372 return ::direct_optab_handler (op, mode);
2375 /* Return true if X overlaps any input. */
2377 function_expander::overlaps_input_p (rtx x)
2379 for (unsigned int i = 0; i < args.length (); ++i)
2380 if (reg_overlap_mentioned_p (x, args[i]))
2385 /* Return the base address for a contiguous load or store function.
2386 MEM_MODE is the mode of the addressed memory. */
2388 function_expander::get_contiguous_base (machine_mode mem_mode)
2391 if (mode_suffix_id == MODE_vnum)
2393 /* Use the size of the memory mode for extending loads and truncating
2394 stores. Use the size of a full vector for non-extending loads
2395 and non-truncating stores (including svld[234] and svst[234]). */
2396 poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode),
2397 BYTES_PER_SVE_VECTOR);
2398 rtx offset = gen_int_mode (size, Pmode);
2399 offset = simplify_gen_binary (MULT, Pmode, args[2], offset);
2400 base = simplify_gen_binary (PLUS, Pmode, base, offset);
2405 /* For a function that does the equivalent of:
2407 OUTPUT = COND ? FN (INPUTS) : FALLBACK;
2409 return the value of FALLBACK.
2411 MODE is the mode of OUTPUT. NOPS is the number of operands in INPUTS.
2412 MERGE_ARGNO is the argument that provides FALLBACK for _m functions,
2413 or DEFAULT_MERGE_ARGNO if we should apply the usual rules.
2415 ARGNO is the caller's index into args. If the returned value is
2416 argument 0 (as for unary _m operations), increment ARGNO past the
2417 returned argument. */
2419 function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
2420 unsigned int merge_argno,
2421 unsigned int &argno)
2424 return CONST0_RTX (mode);
2426 gcc_assert (pred == PRED_m || pred == PRED_x);
2427 if (merge_argno == DEFAULT_MERGE_ARGNO)
2428 merge_argno = nops == 1 && pred == PRED_m ? 0 : 1;
2430 if (merge_argno == 0)
2431 return args[argno++];
2433 return args[merge_argno];
2436 /* Return a REG rtx that can be used for the result of the function,
2437 using the preferred target if suitable. */
2439 function_expander::get_reg_target ()
2441 machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
2442 if (!possible_target || GET_MODE (possible_target) != target_mode)
2443 possible_target = gen_reg_rtx (target_mode);
2444 return possible_target;
2447 /* As for get_reg_target, but make sure that the returned REG does not
2448 overlap any inputs. */
2450 function_expander::get_nonoverlapping_reg_target ()
2452 if (possible_target && overlaps_input_p (possible_target))
2453 possible_target = NULL_RTX;
2454 return get_reg_target ();
2457 /* Add an output operand to the instruction we're building, which has
2458 code ICODE. Bind the output to the preferred target rtx if possible. */
2460 function_expander::add_output_operand (insn_code icode)
2462 unsigned int opno = m_ops.length ();
2463 machine_mode mode = insn_data[icode].operand[opno].mode;
2464 m_ops.safe_grow (opno + 1);
2465 create_output_operand (&m_ops.last (), possible_target, mode);
2468 /* Add an input operand to the instruction we're building, which has
2469 code ICODE. Calculate the value of the operand as follows:
2471 - If the operand is a vector and X is not, broadcast X to fill a
2472 vector of the appropriate mode.
2474 - Otherwise, if the operand is a predicate, coerce X to have the
2475 mode that the instruction expects. In this case X is known to be
2476 VNx16BImode (the mode of svbool_t).
2478 - Otherwise use X directly. The expand machinery checks that X has
2479 the right mode for the instruction. */
2481 function_expander::add_input_operand (insn_code icode, rtx x)
2483 unsigned int opno = m_ops.length ();
2484 const insn_operand_data &operand = insn_data[icode].operand[opno];
2485 machine_mode mode = operand.mode;
2486 if (mode == VOIDmode)
2488 /* The only allowable use of VOIDmode is the wildcard
2489 aarch64_any_register_operand, which is used to avoid
2490 combinatorial explosion in the reinterpret patterns. */
2491 gcc_assert (operand.predicate == aarch64_any_register_operand);
2492 mode = GET_MODE (x);
2494 else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode))
2495 x = expand_vector_broadcast (mode, x);
2496 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
2498 gcc_assert (GET_MODE (x) == VNx16BImode);
2499 x = gen_lowpart (mode, x);
2501 m_ops.safe_grow (m_ops.length () + 1);
2502 create_input_operand (&m_ops.last (), x, mode);
2505 /* Add an integer operand with value X to the instruction. */
2507 function_expander::add_integer_operand (HOST_WIDE_INT x)
2509 m_ops.safe_grow (m_ops.length () + 1);
2510 create_integer_operand (&m_ops.last (), x);
2513 /* Add a memory operand with mode MODE and address ADDR. */
2515 function_expander::add_mem_operand (machine_mode mode, rtx addr)
2517 gcc_assert (VECTOR_MODE_P (mode));
2518 rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr));
2519 /* The memory is only guaranteed to be element-aligned. */
2520 set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode)));
2521 add_fixed_operand (mem);
2524 /* Add an address operand with value X. The static operand data says
2525 what mode and form the address must have. */
2527 function_expander::add_address_operand (rtx x)
2529 m_ops.safe_grow (m_ops.length () + 1);
2530 create_address_operand (&m_ops.last (), x);
2533 /* Add an operand that must be X. The only way of legitimizing an
2534 invalid X is to reload the address of a MEM. */
2536 function_expander::add_fixed_operand (rtx x)
2538 m_ops.safe_grow (m_ops.length () + 1);
2539 create_fixed_operand (&m_ops.last (), x);
2542 /* Generate instruction ICODE, given that its operands have already
2543 been added to M_OPS. Return the value of the first operand. */
2545 function_expander::generate_insn (insn_code icode)
2547 expand_insn (icode, m_ops.length (), m_ops.address ());
2548 return function_returns_void_p () ? const0_rtx : m_ops[0].value;
2551 /* Convert the arguments to a gather/scatter function into the
2552 associated md operands. Argument ARGNO is the scalar or vector base and
2553 argument ARGNO + 1 is the scalar or vector displacement (if applicable).
2554 The md pattern expects:
2557 - a vector displacement
2558 - a const_int that is 1 if the displacement is zero-extended from 32 bits
2559 - a scaling multiplier (1 for bytes, 2 for .h indices, etc.). */
2561 function_expander::prepare_gather_address_operands (unsigned int argno)
2563 machine_mode mem_mode = memory_vector_mode ();
2564 tree vector_type = base_vector_type ();
2565 units_index units = displacement_units ();
2566 if (units == UNITS_none)
2568 /* Vector base, no displacement. Convert to an integer zero base
2569 and a vector byte offset. */
2570 args.quick_insert (argno, const0_rtx);
2571 units = UNITS_bytes;
2573 else if (vector_type)
2575 /* Vector base, scalar displacement. Convert to a scalar base and
2576 a vector byte offset. */
2577 std::swap (args[argno], args[argno + 1]);
2578 if (units == UNITS_elements)
2580 /* Convert the original scalar array index to a byte offset. */
2581 rtx size = gen_int_mode (GET_MODE_UNIT_SIZE (mem_mode), DImode);
2582 args[argno] = simplify_gen_binary (MULT, DImode, args[argno], size);
2583 units = UNITS_bytes;
2588 /* Scalar base, vector displacement. This is what the md pattern wants,
2589 so we just need to make sure that the scalar base has DImode. */
2590 if (Pmode == SImode)
2591 args[argno] = simplify_gen_unary (ZERO_EXTEND, DImode,
2592 args[argno], SImode);
2593 vector_type = displacement_vector_type ();
2595 tree scalar_displacement_type = TREE_TYPE (vector_type);
2597 bool uxtw_p = (TYPE_PRECISION (scalar_displacement_type) < 64
2598 && TYPE_UNSIGNED (scalar_displacement_type));
2599 unsigned int scale = (units == UNITS_bytes
2600 ? 1 : GET_MODE_UNIT_SIZE (mem_mode));
2602 args.quick_insert (argno + 2, GEN_INT (uxtw_p));
2603 args.quick_insert (argno + 3, GEN_INT (scale));
2606 /* The final argument is an immediate svprfop value. Add two fake arguments
2607 to represent the rw and locality operands of a PREFETCH rtx. */
2609 function_expander::prepare_prefetch_operands ()
2611 unsigned int prfop = INTVAL (args.last ());
2612 /* Bit 3 of the prfop selects stores over loads. */
2613 args.quick_push (GEN_INT ((prfop & 8) != 0));
2614 /* Bits 1 and 2 specify the locality; 0-based for svprfop but
2615 1-based for PREFETCH. */
2616 args.quick_push (GEN_INT (((prfop >> 1) & 3) + 1));
2619 /* Add a dummy argument to indicate whether predicate argument ARGNO
2620 is all-true when interpreted in mode PRED_MODE. The hint goes
2621 immediately after ARGNO. */
2623 function_expander::add_ptrue_hint (unsigned int argno, machine_mode pred_mode)
2625 rtx pred = gen_lowpart (pred_mode, args[argno]);
2626 int hint = (pred == CONSTM1_RTX (pred_mode)
2627 ? SVE_KNOWN_PTRUE : SVE_MAYBE_NOT_PTRUE);
2628 args.quick_insert (argno + 1, gen_int_mode (hint, SImode));
2631 /* Rotate inputs args[START:END] one position to the left, so that
2632 args[START] becomes args[END - 1]. */
2634 function_expander::rotate_inputs_left (unsigned int start, unsigned int end)
2636 rtx new_last = args[start];
2637 for (unsigned int i = start; i < end - 1; ++i)
2638 args[i] = args[i + 1];
2639 args[end - 1] = new_last;
2642 /* Return true if the negation of argument ARGNO can be folded away,
2643 replacing it with the negated value if so. MODE is the associated
2644 vector mode, but the argument could be a single element. The main
2645 case this handles is constant arguments. */
2647 function_expander::try_negating_argument (unsigned int argno,
2650 rtx x = args[argno];
2651 if (!VECTOR_MODE_P (GET_MODE (x)))
2652 mode = GET_MODE_INNER (mode);
2654 x = simplify_unary_operation (NEG, mode, x, mode);
2662 /* Implement the call using instruction ICODE, with a 1:1 mapping between
2663 arguments and input operands. */
2665 function_expander::use_exact_insn (insn_code icode)
2667 unsigned int nops = insn_data[icode].n_operands;
2668 if (!function_returns_void_p ())
2670 add_output_operand (icode);
2673 for (unsigned int i = 0; i < nops; ++i)
2674 add_input_operand (icode, args[i]);
2675 return generate_insn (icode);
2678 /* Implement the call using instruction ICODE, which does not use a
2679 governing predicate. We must therefore drop the GP from an _x call. */
2681 function_expander::use_unpred_insn (insn_code icode)
2683 /* We can't drop the predicate for _z and _m. */
2684 gcc_assert (pred == PRED_x || pred == PRED_none);
2685 /* Discount the output operand. */
2686 unsigned int nops = insn_data[icode].n_operands - 1;
2687 /* Drop the predicate argument in the case of _x predication. */
2688 unsigned int bias = (pred == PRED_x ? 1 : 0);
2691 add_output_operand (icode);
2692 for (; i < nops; ++i)
2693 add_input_operand (icode, args[i + bias]);
2695 return generate_insn (icode);
2698 /* Implement the call using instruction ICODE, which is a predicated
2699 operation that returns arbitrary values for inactive lanes. */
2701 function_expander::use_pred_x_insn (insn_code icode)
2703 /* At present we never need to handle PRED_none, which would involve
2704 creating a new predicate rather than using one supplied by the user. */
2705 gcc_assert (pred == PRED_x);
2706 /* Discount the output operand. */
2707 unsigned int nops = args.length () - 1;
2709 bool has_float_operand_p = FLOAT_MODE_P (insn_data[icode].operand[0].mode);
2711 /* Add the normal operands. */
2712 add_output_operand (icode);
2713 add_input_operand (icode, args[0]);
2714 for (unsigned int i = 0; i < nops; ++i)
2716 add_input_operand (icode, args[i + 1]);
2717 if (FLOAT_MODE_P (GET_MODE (args[i + 1])))
2718 has_float_operand_p = true;
2721 if (has_float_operand_p)
2723 /* Add a flag that indicates whether unpredicated instructions
2725 rtx pred = m_ops[1].value;
2726 if (flag_trapping_math && pred != CONST1_RTX (GET_MODE (pred)))
2727 add_integer_operand (SVE_STRICT_GP);
2729 add_integer_operand (SVE_RELAXED_GP);
2732 return generate_insn (icode);
2735 /* Implement the call using instruction ICODE, which does the equivalent of:
2737 OUTPUT = COND ? FN (INPUTS) : FALLBACK;
2739 The instruction operands are in the order above: OUTPUT, COND, INPUTS
2740 and FALLBACK. MERGE_ARGNO is the argument that provides FALLBACK for _m
2741 functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */
2743 function_expander::use_cond_insn (insn_code icode, unsigned int merge_argno)
2745 /* At present we never need to handle PRED_none, which would involve
2746 creating a new predicate rather than using one supplied by the user. */
2747 gcc_assert (pred != PRED_none);
2748 /* Discount the output, predicate and fallback value. */
2749 unsigned int nops = insn_data[icode].n_operands - 3;
2750 machine_mode mode = insn_data[icode].operand[0].mode;
2752 unsigned int opno = 0;
2753 rtx fallback_arg = get_fallback_value (mode, nops, merge_argno, opno);
2754 rtx pred = args[opno++];
2756 add_output_operand (icode);
2757 add_input_operand (icode, pred);
2758 for (unsigned int i = 0; i < nops; ++i)
2759 add_input_operand (icode, args[opno + i]);
2760 add_input_operand (icode, fallback_arg);
2761 return generate_insn (icode);
2764 /* Implement the call using instruction ICODE, which is a select-like
2765 operation with the following operands:
2772 MERGE_ARGNO is the argument that provides the "false" value for _m
2773 functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */
2775 function_expander::use_vcond_mask_insn (insn_code icode,
2776 unsigned int merge_argno)
2778 machine_mode mode = vector_mode (0);
2780 unsigned int opno = 0;
2781 rtx false_arg = get_fallback_value (mode, 1, merge_argno, opno);
2782 rtx pred_arg = args[opno++];
2783 rtx true_arg = args[opno++];
2785 add_output_operand (icode);
2786 add_input_operand (icode, true_arg);
2787 add_input_operand (icode, false_arg);
2788 add_input_operand (icode, pred_arg);
2789 return generate_insn (icode);
2792 /* Implement the call using instruction ICODE, which loads memory operand 1
2793 into register operand 0 under the control of predicate operand 2.
2794 Extending loads have a further predicate (operand 3) that nominally
2795 controls the extension. */
2797 function_expander::use_contiguous_load_insn (insn_code icode)
2799 machine_mode mem_mode = memory_vector_mode ();
2801 add_output_operand (icode);
2802 add_mem_operand (mem_mode, get_contiguous_base (mem_mode));
2803 add_input_operand (icode, args[0]);
2804 if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits)
2805 add_input_operand (icode, CONSTM1_RTX (VNx16BImode));
2806 return generate_insn (icode);
2809 /* Implement the call using instruction ICODE, which prefetches from
2810 address operand 1 under the control of predicate operand 0.
2811 Operands 2, 3 and 4 respectively specify the svprfop value,
2812 the PREFETCH rw flag and the PREFETCH locality. */
2814 function_expander::use_contiguous_prefetch_insn (insn_code icode)
2816 add_input_operand (icode, args[0]);
2817 add_address_operand (get_contiguous_base (VNx16QImode));
2818 for (unsigned int i = args.length () - 3; i < args.length (); ++i)
2819 add_input_operand (icode, args[i]);
2820 return generate_insn (icode);
2823 /* Implement the call using instruction ICODE, which stores register operand 1
2824 into memory operand 0 under the control of predicate operand 2. */
2826 function_expander::use_contiguous_store_insn (insn_code icode)
2828 machine_mode mem_mode = memory_vector_mode ();
2830 add_mem_operand (mem_mode, get_contiguous_base (mem_mode));
2831 add_input_operand (icode, args.last ());
2832 add_input_operand (icode, args[0]);
2833 return generate_insn (icode);
2836 /* Implement the call using one of the following strategies, chosen in order:
2838 (1) "aarch64_pred_<optab><mode>_z" for PRED_z predicate functions
2840 (2) "aarch64_pred_<optab><mode>" for PRED_x functions
2842 (3) a normal unpredicated optab for PRED_none and PRED_x functions,
2843 dropping the predicate in the latter case
2845 (4) "cond_<optab><mode>" otherwise
2847 where <optab> corresponds to:
2849 - CODE_FOR_SINT for signed integers
2850 - CODE_FOR_UINT for unsigned integers
2851 - UNSPEC_FOR_FP for floating-point values
2853 MERGE_ARGNO is the argument that provides the values of inactive lanes for
2854 _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */
2856 function_expander::map_to_rtx_codes (rtx_code code_for_sint,
2857 rtx_code code_for_uint,
2859 unsigned int merge_argno)
2861 machine_mode mode = vector_mode (0);
2862 rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint);
2865 /* Handle predicate logic operations, which always use _z predication. */
2866 if (type_suffix (0).tclass == TYPE_bool)
2868 gcc_assert (pred == PRED_z && code_for_uint == code_for_sint);
2869 return use_exact_insn (code_for_aarch64_pred_z (code, mode));
2872 /* First try using UNSPEC_PRED_X patterns for _x predication,
2876 if (type_suffix (0).integer_p)
2877 icode = maybe_code_for_aarch64_pred (code, mode);
2879 icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode);
2880 if (icode != CODE_FOR_nothing)
2881 return use_pred_x_insn (icode);
2884 /* Otherwise expand PRED_none and PRED_x operations without a predicate.
2885 Floating-point operations conventionally use the signed rtx code. */
2886 if (pred == PRED_none || pred == PRED_x)
2887 return use_unpred_insn (direct_optab_handler (code_to_optab (code), 0));
2889 /* Don't use cond_*_optabs here, since not all codes have one yet. */
2890 if (type_suffix (0).integer_p)
2891 icode = code_for_cond (code, mode);
2893 icode = code_for_cond (unspec_for_fp, mode);
2894 return use_cond_insn (icode, merge_argno);
2897 /* Implement the call using one of the following strategies, chosen in order:
2899 (1) "aarch64_pred_<optab><mode>" for PRED_x functions; this is a
2902 (2) "aarch64_sve_<optab><mode>" for PRED_none and PRED_x functions;
2903 this is an unpredicated pattern
2905 (3) "cond_<optab><mode>" otherwise
2907 where <optab> corresponds to:
2909 - UNSPEC_FOR_SINT for signed integers
2910 - UNSPEC_FOR_UINT for unsigned integers
2911 - UNSPEC_FOR_FP for floating-point values
2913 MERGE_ARGNO is the argument that provides the values of inactive lanes for
2914 _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */
2916 function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint,
2917 int unspec_for_fp, unsigned int merge_argno)
2919 machine_mode mode = vector_mode (0);
2920 int unspec = (!type_suffix (0).integer_p ? unspec_for_fp
2921 : type_suffix (0).unsigned_p ? unspec_for_uint
2926 insn_code icode = maybe_code_for_aarch64_pred (unspec, mode);
2927 if (icode != CODE_FOR_nothing)
2928 return use_pred_x_insn (icode);
2931 if (pred == PRED_none || pred == PRED_x)
2932 return use_unpred_insn (code_for_aarch64_sve (unspec, mode));
2934 insn_code icode = code_for_cond (unspec, vector_mode (0));
2935 return use_cond_insn (icode, merge_argno);
2938 /* Implement the call using an @aarch64 instruction and the
2939 instructions are parameterized by an rtx_code. CODE_FOR_SINT
2940 is the rtx_code for signed integer operations, CODE_FOR_UINT
2941 is the rtx_code for unsigned integer operations. */
2943 function_expander::expand_signed_unpred_op (rtx_code code_for_sint,
2944 rtx_code code_for_uint)
2947 if (type_suffix (0).unsigned_p)
2948 icode = code_for_aarch64 (code_for_uint, code_for_uint, vector_mode (0));
2950 icode = code_for_aarch64 (code_for_sint, code_for_sint, vector_mode (0));
2951 return use_unpred_insn (icode);
2954 /* Expand the call and return its lhs. */
2956 function_expander::expand ()
2958 unsigned int nargs = call_expr_nargs (call_expr);
2959 args.reserve (nargs);
2960 for (unsigned int i = 0; i < nargs; ++i)
2961 args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i)));
2963 return base->expand (*this);
2966 /* Register the built-in SVE ABI types, such as __SVBool_t. */
2968 register_builtin_types ()
2970 #define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
2971 scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE;
2972 #include "aarch64-sve-builtins.def"
2974 for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i)
2976 tree eltype = scalar_types[i];
2978 if (eltype == boolean_type_node)
2980 vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR,
2982 gcc_assert (TYPE_MODE (vectype) == VNx16BImode
2983 && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype)
2984 && TYPE_ALIGN (vectype) == 16
2985 && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)),
2986 BYTES_PER_SVE_VECTOR));
2990 unsigned int elbytes = tree_to_uhwi (TYPE_SIZE_UNIT (eltype));
2991 poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes);
2992 vectype = build_vector_type (eltype, nunits);
2993 gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype))
2994 && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype)
2995 && TYPE_ALIGN (vectype) == 128
2996 && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)),
2997 BITS_PER_SVE_VECTOR));
2999 vectype = build_distinct_type_copy (vectype);
3000 gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
3001 SET_TYPE_STRUCTURAL_EQUALITY (vectype);
3002 TYPE_ARTIFICIAL (vectype) = 1;
3003 TYPE_INDIVISIBLE_P (vectype) = 1;
3004 abi_vector_types[i] = vectype;
3005 lang_hooks.types.register_builtin_type (vectype,
3006 vector_types[i].abi_name);
3010 /* Initialize all compiler built-ins related to SVE that should be
3011 defined at start-up. */
3016 register_builtin_types ();
3019 /* Register vector type TYPE under its arm_sve.h name. */
3021 register_vector_type (vector_type_index type)
3023 tree vectype = abi_vector_types[type];
3024 tree id = get_identifier (vector_types[type].acle_name);
3025 tree decl = build_decl (input_location, TYPE_DECL, id, vectype);
3026 decl = lang_hooks.decls.pushdecl (decl);
3028 /* Record the new ACLE type if pushdecl succeeded without error. Use
3029 the ABI type otherwise, so that the type we record at least has the
3030 right form, even if it doesn't have the right name. This should give
3031 better error recovery behavior than installing error_mark_node or
3032 installing an incorrect type. */
3033 if (TREE_CODE (decl) == TYPE_DECL
3034 && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype)
3035 vectype = TREE_TYPE (decl);
3036 acle_vector_types[0][type] = vectype;
3039 /* Register the tuple type that contains NUM_VECTORS vectors of type TYPE. */
3041 register_tuple_type (unsigned int num_vectors, vector_type_index type)
3043 tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
3045 /* The contents of the type are opaque, so we can define them in any
3046 way that maps to the correct ABI type.
3048 Here we choose to use the same layout as for arm_neon.h, but with
3049 "__val" instead of "val":
3051 struct svfooxN_t { svfoo_t __val[N]; };
3053 (It wouldn't be possible to write that directly in C or C++ for
3054 sizeless types, but that's not a problem for this function.)
3056 Using arrays simplifies the handling of svget and svset for variable
3058 tree vector_type = acle_vector_types[0][type];
3059 tree array_type = build_array_type_nelts (vector_type, num_vectors);
3060 gcc_assert (VECTOR_MODE_P (TYPE_MODE (array_type))
3061 && TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type)
3062 && TYPE_ALIGN (array_type) == 128);
3064 tree field = build_decl (input_location, FIELD_DECL,
3065 get_identifier ("__val"), array_type);
3066 DECL_FIELD_CONTEXT (field) = tuple_type;
3067 TYPE_FIELDS (tuple_type) = field;
3068 layout_type (tuple_type);
3069 gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
3070 && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
3071 && TYPE_ALIGN (tuple_type) == 128);
3073 /* Work out the structure name. */
3074 char buffer[sizeof ("svfloat64x4_t")];
3075 const char *vector_type_name = vector_types[type].acle_name;
3076 snprintf (buffer, sizeof (buffer), "%.*sx%d_t",
3077 (int) strlen (vector_type_name) - 2, vector_type_name,
3080 tree decl = build_decl (input_location, TYPE_DECL,
3081 get_identifier (buffer), tuple_type);
3082 TYPE_NAME (tuple_type) = decl;
3083 TYPE_STUB_DECL (tuple_type) = decl;
3084 lang_hooks.decls.pushdecl (decl);
3085 /* ??? Undo the effect of set_underlying_type for C. The C frontend
3086 doesn't recognize DECL as a built-in because (as intended) the decl has
3087 a real location instead of BUILTINS_LOCATION. The frontend therefore
3088 treats the decl like a normal C "typedef struct foo foo;", expecting
3089 the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead
3090 of the named one we attached above. It then sets DECL_ORIGINAL_TYPE
3091 on the supposedly unnamed decl, creating a circularity that upsets
3094 We don't want to follow the normal C model and create "struct foo"
3095 tags for tuple types since (a) the types are supposed to be opaque
3096 and (b) they couldn't be defined as a real struct anyway. Treating
3097 the TYPE_DECLs as "typedef struct foo foo;" without creating
3098 "struct foo" would lead to confusing error messages. */
3099 DECL_ORIGINAL_TYPE (decl) = NULL_TREE;
3101 acle_vector_types[num_vectors - 1][type] = tuple_type;
3104 /* Register the svpattern enum. */
3106 register_svpattern ()
3108 auto_vec<string_int_pair, 32> values;
3109 #define PUSH(UPPER, LOWER, VALUE) \
3110 values.quick_push (string_int_pair ("SV_" #UPPER, VALUE));
3111 AARCH64_FOR_SVPATTERN (PUSH)
3114 acle_svpattern = lang_hooks.types.simulate_enum_decl (input_location,
3115 "svpattern", values);
3118 /* Register the svprfop enum. */
3122 auto_vec<string_int_pair, 16> values;
3123 #define PUSH(UPPER, LOWER, VALUE) \
3124 values.quick_push (string_int_pair ("SV_" #UPPER, VALUE));
3125 AARCH64_FOR_SVPRFOP (PUSH)
3128 acle_svprfop = lang_hooks.types.simulate_enum_decl (input_location,
3132 /* Implement #pragma GCC aarch64 "arm_sve.h". */
3138 error ("duplicate definition of %qs", "arm_sve.h");
3144 /* Define the vector and tuple types. */
3145 for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i)
3147 vector_type_index type = vector_type_index (type_i);
3148 register_vector_type (type);
3149 if (type != VECTOR_TYPE_svbool_t)
3150 for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
3151 register_tuple_type (count, type);
3154 /* Define the enums. */
3155 register_svpattern ();
3156 register_svprfop ();
3158 /* Define the functions. */
3159 function_table = new hash_table<registered_function_hasher> (1023);
3160 function_builder builder;
3161 for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i)
3162 builder.register_function_group (function_groups[i]);
3165 /* Return the function decl with SVE function subcode CODE, or error_mark_node
3166 if no such function exists. */
3168 builtin_decl (unsigned int code, bool)
3170 if (code >= vec_safe_length (registered_functions))
3171 return error_mark_node;
3172 return (*registered_functions)[code]->decl;
3175 /* If we're implementing manual overloading, check whether the SVE
3176 function with subcode CODE is overloaded, and if so attempt to
3177 determine the corresponding non-overloaded function. The call
3178 occurs at location LOCATION and has the arguments given by ARGLIST.
3180 If the call is erroneous, report an appropriate error and return
3181 error_mark_node. Otherwise, if the function is overloaded, return
3182 the decl of the non-overloaded function. Return NULL_TREE otherwise,
3183 indicating that the call should be processed in the normal way. */
3185 resolve_overloaded_builtin (location_t location, unsigned int code,
3186 vec<tree, va_gc> *arglist)
3188 if (code >= vec_safe_length (registered_functions))
3191 registered_function &rfn = *(*registered_functions)[code];
3192 if (rfn.overloaded_p)
3193 return function_resolver (location, rfn.instance, rfn.decl,
3194 *arglist).resolve ();
3198 /* Perform any semantic checks needed for a call to the SVE function
3199 with subcode CODE, such as testing for integer constant expressions.
3200 The call occurs at location LOCATION and has NARGS arguments,
3201 given by ARGS. FNDECL is the original function decl, before
3202 overload resolution.
3204 Return true if the call is valid, otherwise report a suitable error. */
3206 check_builtin_call (location_t location, vec<location_t>, unsigned int code,
3207 tree fndecl, unsigned int nargs, tree *args)
3209 const registered_function &rfn = *(*registered_functions)[code];
3210 if (!check_required_extensions (location, rfn.decl, rfn.required_extensions))
3212 return function_checker (location, rfn.instance, fndecl,
3213 TREE_TYPE (rfn.decl), nargs, args).check ();
3216 /* Attempt to fold STMT, given that it's a call to the SVE function
3217 with subcode CODE. Return the new statement on success and null
3218 on failure. Insert any other new statements at GSI. */
3220 gimple_fold_builtin (unsigned int code, gimple_stmt_iterator *gsi, gcall *stmt)
3222 registered_function &rfn = *(*registered_functions)[code];
3223 return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold ();
3226 /* Expand a call to the SVE function with subcode CODE. EXP is the call
3227 expression and TARGET is the preferred location for the result.
3228 Return the value of the lhs. */
3230 expand_builtin (unsigned int code, tree exp, rtx target)
3232 registered_function &rfn = *(*registered_functions)[code];
3233 if (!check_required_extensions (EXPR_LOCATION (exp), rfn.decl,
3234 rfn.required_extensions))
3236 return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
3239 /* Return true if TYPE is the ABI-defined __SVBool_t type. */
3241 svbool_type_p (const_tree type)
3243 tree abi_type = abi_vector_types[VECTOR_TYPE_svbool_t];
3244 return type != error_mark_node && TYPE_MAIN_VARIANT (type) == abi_type;
3247 /* If TYPE is a built-in type defined by the SVE ABI, return the mangled name,
3248 otherwise return NULL. */
3250 mangle_builtin_type (const_tree type)
3252 if (type == error_mark_node)
3255 vector_type_index vtype = find_vector_type (type);
3256 if (vtype != NUM_VECTOR_TYPES)
3257 return vector_types[vtype].mangled_name;
3262 /* If TYPE is one of the ABI-defined SVE vector types, or an ACLE-defined
3263 tuple of them, return the number of vectors it contains. Return 0
3266 nvectors_if_data_type (const_tree type)
3268 if (type == error_mark_node)
3271 type = TYPE_MAIN_VARIANT (type);
3272 if (VECTOR_TYPE_P (type))
3274 vector_type_index type_id = find_vector_type (type);
3275 if (type_id != VECTOR_TYPE_svbool_t && type_id != NUM_VECTOR_TYPES)
3278 else if (TREE_CODE (type) == RECORD_TYPE)
3280 for (unsigned int size_i = 1; size_i < MAX_TUPLE_SIZE; ++size_i)
3281 for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i)
3283 tree tuple_type = acle_vector_types[size_i][type_i];
3284 if (tuple_type && type == TYPE_MAIN_VARIANT (tuple_type))
3292 /* Return true if TYPE is a built-in type defined by the SVE ABI. */
3294 builtin_type_p (const_tree type)
3296 return svbool_type_p (type) || nvectors_if_data_type (type) > 0;
3301 using namespace aarch64_sve;
3304 gt_ggc_mx (function_instance *)
3309 gt_pch_nx (function_instance *)
3314 gt_pch_nx (function_instance *, void (*) (void *, void *), void *)
3318 #include "gt-aarch64-sve-builtins.h"