From: Johannes Doerfert Date: Tue, 4 Oct 2022 12:45:21 +0000 (-0700) Subject: [AMDGPU] Annotate the intrinsics to be default and nocallback X-Git-Tag: upstream/17.0.6~24865 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f6e3a89cc0b3b31de30ce68a4fcae44aa15daaa2;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Annotate the intrinsics to be default and nocallback Differential Revision: https://reviews.llvm.org/D135155 --- diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 600194e..eb1fa22 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -796,7 +796,7 @@ kernel void test_s_setreg(uint val) { // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} -// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nofree nounwind memory(read) } +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[$EXEC]] = !{!"exec"} // CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index bfd392f..1d1447c 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -513,10 +513,10 @@ def int_addressofreturnaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [In def int_frameaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_sponentry : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; -def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], +def int_read_register : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], - [], "llvm.write_register">; + [IntrNoCallback], "llvm.write_register">; def int_read_volatile_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrHasSideEffects], "llvm.read_volatile_register">; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 3e9233b..2e63213 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -11,10 +11,10 @@ //===----------------------------------------------------------------------===// class AMDGPUReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; class AMDGPUReadPreloadRegisterIntrinsicNamed - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, ClangBuiltin; + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, ClangBuiltin; // Used to tag image and resource intrinsics with information used to generate // mem operands. @@ -53,33 +53,33 @@ def int_r600_group_barrier : ClangBuiltin<"__builtin_r600_group_barrier">, // AS 7 is PARAM_I_ADDRESS, used for kernel arguments def int_r600_implicitarg_ptr : ClangBuiltin<"__builtin_r600_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_r600_rat_store_typed : // 1st parameter: Data // 2nd parameter: Index // 3rd parameter: Constant RAT ID - Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>, + DefaultAttrsIntrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, ClangBuiltin<"__builtin_r600_rat_store_typed">; -def int_r600_recipsqrt_ieee : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_recipsqrt_ieee : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_r600_recipsqrt_clamped : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_recipsqrt_clamped : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_r600_cube : Intrinsic< - [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_cube : DefaultAttrsIntrinsic< + [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_r600_store_stream_output : Intrinsic< - [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn] +def int_r600_store_stream_output : DefaultAttrsIntrinsic< + [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] >; -class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [ +class TextureIntrinsicFloatInput : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [ llvm_v4f32_ty, // Coord llvm_i32_ty, // offset_x llvm_i32_ty, // offset_y, @@ -90,10 +90,10 @@ class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [ llvm_i32_ty, // coord_type_y llvm_i32_ty, // coord_type_z llvm_i32_ty], // coord_type_w - [IntrNoMem, IntrWillReturn] + [IntrNoMem] >; -class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [ +class TextureIntrinsicInt32Input : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty, // Coord llvm_i32_ty, // offset_x llvm_i32_ty, // offset_y, @@ -104,11 +104,11 @@ class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [ llvm_i32_ty, // coord_type_y llvm_i32_ty, // coord_type_z llvm_i32_ty], // coord_type_w - [IntrNoMem, IntrWillReturn] + [IntrNoMem] >; def int_r600_store_swizzle : - Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn] + Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_r600_tex : TextureIntrinsicFloatInput; @@ -122,11 +122,11 @@ def int_r600_txq : TextureIntrinsicInt32Input; def int_r600_ddx : TextureIntrinsicFloatInput; def int_r600_ddy : TextureIntrinsicFloatInput; -def int_r600_dot4 : Intrinsic<[llvm_float_ty], - [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_r600_dot4 : DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_r600_kill : Intrinsic<[], [llvm_float_ty], [IntrWillReturn]>; +def int_r600_kill : DefaultAttrsIntrinsic<[], [llvm_float_ty], []>; } // End TargetPrefix = "r600" @@ -141,40 +141,40 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_amdgcn_workgroup_id">; def int_amdgcn_dispatch_ptr : - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_queue_ptr : ClangBuiltin<"__builtin_amdgcn_queue_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_kernarg_segment_ptr : ClangBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicitarg_ptr : ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_groupstaticsize : ClangBuiltin<"__builtin_amdgcn_groupstaticsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_dispatch_id : ClangBuiltin<"__builtin_amdgcn_dispatch_id">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; // For internal use. Coordinates LDS lowering between IR transform and backend. def int_amdgcn_lds_kernel_id : - Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicit_buffer_ptr : ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, - Intrinsic<[LLVMQualPointerType], [], - [Align, IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[LLVMQualPointerType], [], + [Align, IntrNoMem, IntrSpeculatable]>; // Set EXEC to the 64-bit value given. // This is always moved to the beginning of the basic block. @@ -195,7 +195,7 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[], def int_amdgcn_wavefrontsize : ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// @@ -218,10 +218,10 @@ def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The 1st parameter is a mask for the types of instructions that may be allowed // to cross the SCHED_BARRIER during scheduling. @@ -239,7 +239,7 @@ def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">, // MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER. def int_amdgcn_sched_barrier : ClangBuiltin<"__builtin_amdgcn_sched_barrier">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrConvergent, - IntrWillReturn]>; + IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The first parameter is a mask that determines the types of instructions that // you would like to synchronize around and add to a scheduling group. The @@ -252,59 +252,59 @@ def int_amdgcn_sched_barrier : ClangBuiltin<"__builtin_amdgcn_sched_barrier">, def int_amdgcn_sched_group_barrier : ClangBuiltin<"__builtin_amdgcn_sched_group_barrier">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, IntrNoMem, IntrHasSideEffects, - IntrConvergent, IntrWillReturn]>; + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // Scheduler optimization hint. // MASK = 0: Small gemm opt def int_amdgcn_iglp_opt : ClangBuiltin<"__builtin_amdgcn_iglp_opt">, Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrConvergent, - IntrWillReturn]>; + IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_waitcnt : ClangBuiltin<"__builtin_amdgcn_s_waitcnt">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -def int_amdgcn_div_scale : Intrinsic< +def int_amdgcn_div_scale : DefaultAttrsIntrinsic< // 1st parameter: Numerator // 2nd parameter: Denominator // 3rd parameter: Select quotient. Must equal Numerator or Denominator. // (0 = Denominator, 1 = Numerator). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>, IntrWillReturn] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; -def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], +def int_amdgcn_div_fmas : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], +def int_amdgcn_div_fixup : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // Look Up 2.0 / pi src0 with segment select src1[4:0] -def int_amdgcn_trig_preop : Intrinsic< +def int_amdgcn_trig_preop : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sin : Intrinsic< +def int_amdgcn_sin : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cos : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_cos : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_log_clamp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_log_clamp : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative] >; // Fused single-precision multiply-add with legacy behaviour for the multiply, @@ -313,133 +313,133 @@ def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">, // v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and // has a completely different kind of legacy behaviour.) def int_amdgcn_fma_legacy : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative] >; -def int_amdgcn_rcp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_rcp : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sqrt : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_sqrt : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_rsq : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_rsq : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">, - Intrinsic< - [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic< + [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; // out = 1.0 / sqrt(a) result clamped to +/- max_float. -def int_amdgcn_rsq_clamp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; // For int_amdgcn_ldexp_f16, only the low 16 bits of the i32 src1 operand will used. -def int_amdgcn_ldexp : Intrinsic< +def int_amdgcn_ldexp : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_frexp_mant : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_frexp_exp : Intrinsic< - [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_frexp_exp : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] >; // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 // and always uses rtz, so is not suitable for implementing the OpenCL // fract function. It should be ok on VI. -def int_amdgcn_fract : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] +def int_amdgcn_fract : DefaultAttrsIntrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pknorm_i16 : ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, - Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pknorm_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, - Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_i16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">, - Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_class : Intrinsic< +def int_amdgcn_class : DefaultAttrsIntrinsic< [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmed3 : ClangBuiltin<"__builtin_amdgcn_fmed3">, - Intrinsic<[llvm_anyfloat_ty], + DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz // should be used. def int_amdgcn_sffbh : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable] >; // v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support. def int_amdgcn_fmad_ftz : - Intrinsic<[llvm_anyfloat_ty], + DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // Fields should mirror atomicrmw @@ -450,7 +450,7 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], llvm_i32_ty, // scope llvm_i1_ty], // isVolatile [IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>], "", + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -465,7 +465,7 @@ class AMDGPULDSIntrin : llvm_i32_ty, // scope llvm_i1_ty], // isVolatile [IntrArgMemOnly, IntrWillReturn, NoCapture>, - ImmArg>, ImmArg>, ImmArg>] + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree] >; // FIXME: The m0 argument should be moved after the normal arguments @@ -484,7 +484,7 @@ class AMDGPUDSOrderedIntrinsic : Intrinsic< llvm_i1_ty], // wave done, set to 1 for the last ordered instruction [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, ImmArg>, - ImmArg>, ImmArg>, ImmArg> + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree ] >; @@ -493,7 +493,7 @@ class AMDGPUDSAppendConsumedIntrinsic : Intrinsic< [llvm_anyptr_ty, // LDS or GDS ptr llvm_i1_ty], // isVolatile [IntrConvergent, IntrWillReturn, IntrArgMemOnly, - NoCapture>, ImmArg>], + NoCapture>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -991,30 +991,29 @@ let TargetPrefix = "amdgcn" in { defset list AMDGPUBufferIntrinsics = { -class AMDGPUBufferLoad : Intrinsic < +class AMDGPUBufferLoad : DefaultAttrsIntrinsic < [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem, IntrWillReturn, - ImmArg>, ImmArg>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; // Generate a buffer_load instruction that may be optimized to s_buffer_load if // the offset argument is uniform. -def int_amdgcn_s_buffer_load : Intrinsic < +def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // byte offset llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc) - [IntrNoMem, IntrWillReturn, ImmArg>]>, + [IntrNoMem, ImmArg>]>, AMDGPURsrcIntrinsic<0>; -class AMDGPUBufferStore : Intrinsic < +class AMDGPUBufferStore : DefaultAttrsIntrinsic < [], [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) @@ -1022,8 +1021,7 @@ class AMDGPUBufferStore : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem, IntrWillReturn, - ImmArg>, ImmArg>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -1035,7 +1033,7 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore; // and swizzling changes depending on whether idxen is set in the instruction. // These new instrinsics also keep the offset and soffset arguments separate as // they behave differently in bounds checking and swizzling. -class AMDGPURawBufferLoad : Intrinsic < +class AMDGPURawBufferLoad : DefaultAttrsIntrinsic < [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -1044,12 +1042,12 @@ class AMDGPURawBufferLoad : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; -class AMDGPUStructBufferLoad : Intrinsic < +class AMDGPUStructBufferLoad : DefaultAttrsIntrinsic < [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -1059,12 +1057,12 @@ class AMDGPUStructBufferLoad : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; -class AMDGPURawBufferStore : Intrinsic < +class AMDGPURawBufferStore : DefaultAttrsIntrinsic < [], [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) @@ -1074,12 +1072,12 @@ class AMDGPURawBufferStore : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; -class AMDGPUStructBufferStore : Intrinsic < +class AMDGPUStructBufferStore : DefaultAttrsIntrinsic < [], [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) @@ -1090,7 +1088,7 @@ class AMDGPUStructBufferStore : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, ImmArg>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; @@ -1102,7 +1100,7 @@ class AMDGPURawBufferAtomic : llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; @@ -1126,7 +1124,7 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; // gfx908 intrinsic @@ -1140,7 +1138,7 @@ class AMDGPUStructBufferAtomic>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic; @@ -1163,7 +1161,7 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; // gfx908 intrinsic @@ -1175,7 +1173,7 @@ def int_amdgcn_struct_buffer_atomic_fmax : AMDGPUStructBufferAtomic>, ImmArg>, ImmArg>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_tbuffer_store : Intrinsic < +def int_amdgcn_tbuffer_store : DefaultAttrsIntrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) @@ -1203,7 +1201,7 @@ def int_amdgcn_tbuffer_store : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem, IntrWillReturn, ImmArg>, + [IntrWriteMem, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1212,7 +1210,7 @@ def int_amdgcn_tbuffer_store : Intrinsic < // - raw and struct variants // - joint format field // - joint cachepolicy field -def int_amdgcn_raw_tbuffer_load : Intrinsic < +def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -1222,11 +1220,11 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_raw_tbuffer_store : Intrinsic < +def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) @@ -1237,11 +1235,11 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_struct_tbuffer_load : Intrinsic < +def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -1252,11 +1250,11 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, IntrWillReturn, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_struct_tbuffer_store : Intrinsic < +def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic < [], [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 llvm_v4i32_ty, // rsrc(SGPR) @@ -1268,7 +1266,7 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, IntrWillReturn, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1279,7 +1277,7 @@ class AMDGPUBufferAtomic : Intrinsic < llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; @@ -1299,7 +1297,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic; @@ -1311,7 +1309,7 @@ class AMDGPUBufferAtomicFP : Intrinsic < llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg>, IntrWillReturn], "", [SDNPMemOperand]>, + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; // Legacy form of the intrinsic. raw and struct forms should be preferred. @@ -1330,7 +1328,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < // bit 2 = dlc on gfx10+)) // swizzled buffer (bit 3 = swz)) [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, - ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; + ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS; class AMDGPUStructBufferLoadLDS : Intrinsic < @@ -1347,14 +1345,14 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < // bit 2 = dlc on gfx10+)) // swizzled buffer (bit 3 = swz)) [IntrWillReturn, NoCapture>, ImmArg>, ImmArg>, - ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; + ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS; } // defset AMDGPUBufferIntrinsics // Uses that do not set the done bit should set IntrWriteMem on the // call site. -def int_amdgcn_exp : Intrinsic <[], [ +def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_any_ty, // src0 (f32 or i32) @@ -1365,12 +1363,11 @@ def int_amdgcn_exp : Intrinsic <[], [ llvm_i1_ty // vm (ignored on GFX11+) ], [ImmArg>, ImmArg>, ImmArg>, - ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly, - IntrWillReturn] + ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with row_en bit set. Only supported on GFX11+. -def int_amdgcn_exp_row : Intrinsic <[], [ +def int_amdgcn_exp_row : DefaultAttrsIntrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_any_ty, // src0 (f32 or i32) @@ -1380,11 +1377,11 @@ def int_amdgcn_exp_row : Intrinsic <[], [ llvm_i1_ty, // done llvm_i32_ty], // row number [ImmArg>, ImmArg>, ImmArg>, - IntrWriteMem, IntrInaccessibleMemOnly, IntrWillReturn] + IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with compr bit set. Not supported on GFX11+. -def int_amdgcn_exp_compr : Intrinsic <[], [ +def int_amdgcn_exp_compr : DefaultAttrsIntrinsic <[], [ llvm_i32_ty, // tgt, llvm_i32_ty, // en llvm_anyvector_ty, // src0 (v2f16 or v2i16) @@ -1392,58 +1389,57 @@ def int_amdgcn_exp_compr : Intrinsic <[], [ llvm_i1_ty, // done llvm_i1_ty], // vm [ImmArg>, ImmArg>, ImmArg>, - ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly, - IntrWillReturn] + ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly] >; def int_amdgcn_buffer_wbinvl1_sc : ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_buffer_wbinvl1 : ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_dcache_inv : ClangBuiltin<"__builtin_amdgcn_s_dcache_inv">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_memtime : ClangBuiltin<"__builtin_amdgcn_s_memtime">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_sleep : ClangBuiltin<"__builtin_amdgcn_s_sleep">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]> { + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { } def int_amdgcn_s_incperflevel : ClangBuiltin<"__builtin_amdgcn_s_incperflevel">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]> { + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { } def int_amdgcn_s_decperflevel : ClangBuiltin<"__builtin_amdgcn_s_decperflevel">, - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]> { + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { } def int_amdgcn_s_sethalt : - Intrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]>; def int_amdgcn_s_setprio : ClangBuiltin<"__builtin_amdgcn_s_setprio">, - Intrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, - IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]>; // This is IntrHasSideEffects so it can be used to read cycle counters. def int_amdgcn_s_getreg : ClangBuiltin<"__builtin_amdgcn_s_getreg">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>] >; // Note this can be used to set FP environment properties that are @@ -1452,8 +1448,8 @@ def int_amdgcn_s_getreg : // subtarget. llvm.amdgcn.s.setreg(hwmode, value) def int_amdgcn_s_setreg : ClangBuiltin<"__builtin_amdgcn_s_setreg">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg>] + DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>] >; // int_amdgcn_s_getpc is provided to allow a specific style of position @@ -1464,16 +1460,16 @@ def int_amdgcn_s_setreg : // especially as we explicitly use IntrNoMem to allow optimizations. def int_amdgcn_s_getpc : ClangBuiltin<"__builtin_amdgcn_s_getpc">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, + DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : ClangBuiltin<"__builtin_amdgcn_interp_mov">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p1 , , , @@ -1481,17 +1477,17 @@ def int_amdgcn_interp_mov : // so it behaves like IntrNoMem. def int_amdgcn_interp_p1 : ClangBuiltin<"__builtin_amdgcn_interp_p1">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p2 , , , , def int_amdgcn_interp_p2 : ClangBuiltin<"__builtin_amdgcn_interp_p2">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. @@ -1499,183 +1495,183 @@ def int_amdgcn_interp_p2 : // high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p1_f16 : ClangBuiltin<"__builtin_amdgcn_interp_p1_f16">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p2_f16 , , , , , // high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p2_f16 : ClangBuiltin<"__builtin_amdgcn_interp_p2_f16">, - Intrinsic<[llvm_half_ty], + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // llvm.amdgcn.lds.direct.load // The input argument is m0, which contains a packed combination of address // offset and flags describing the data type. def int_amdgcn_lds_direct_load : - Intrinsic<[llvm_any_ty], // overloaded for types u8, u16, i32/f32, i8, i16 + DefaultAttrsIntrinsic<[llvm_any_ty], // overloaded for types u8, u16, i32/f32, i8, i16 [llvm_i32_ty], - [IntrReadMem, IntrSpeculatable, IntrWillReturn]>; + [IntrReadMem, IntrSpeculatable]>; // llvm.amdgcn.lds.param.load , , // Like interp intrinsics, this reads from lds, but the memory values are constant, // so it behaves like IntrNoMem. def int_amdgcn_lds_param_load : - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // llvm.amdgcn.interp.inreg.p10

, , def int_amdgcn_interp_inreg_p10 : - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [IntrNoMem, IntrSpeculatable]>; // llvm.amdgcn.interp.inreg.p2

, , def int_amdgcn_interp_inreg_p2 : - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [IntrNoMem, IntrSpeculatable]>; // llvm.amdgcn.interp.inreg.p10.f16

, , , // high selects whether high or low 16-bits are used for p and p0 operands def int_amdgcn_interp_inreg_p10_f16: - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>]>; // llvm.amdgcn.interp.inreg.p2.f16

, , , // high selects whether high or low 16-bits are used for p operand def int_amdgcn_interp_inreg_p2_f16 : - Intrinsic<[llvm_half_ty], + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>]>; // Deprecated: use llvm.amdgcn.live.mask instead. -def int_amdgcn_ps_live : Intrinsic < +def int_amdgcn_ps_live : DefaultAttrsIntrinsic < [llvm_i1_ty], [], - [IntrNoMem, IntrWillReturn]>; + [IntrNoMem]>; // Query currently live lanes. // Returns true if lane is live (and not a helper lane). -def int_amdgcn_live_mask : Intrinsic <[llvm_i1_ty], - [], [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn] +def int_amdgcn_live_mask : DefaultAttrsIntrinsic <[llvm_i1_ty], + [], [IntrReadMem, IntrInaccessibleMemOnly] >; def int_amdgcn_mbcnt_lo : ClangBuiltin<"__builtin_amdgcn_mbcnt_lo">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_amdgcn_mbcnt_hi : ClangBuiltin<"__builtin_amdgcn_mbcnt_hi">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // llvm.amdgcn.ds.swizzle src offset def int_amdgcn_ds_swizzle : ClangBuiltin<"__builtin_amdgcn_ds_swizzle">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, ImmArg>]>; -def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], +def int_amdgcn_ubfe : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], +def int_amdgcn_sbfe : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_lerp : ClangBuiltin<"__builtin_amdgcn_lerp">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_u8 : ClangBuiltin<"__builtin_amdgcn_sad_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_msad_u8 : ClangBuiltin<"__builtin_amdgcn_msad_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_hi_u8 : ClangBuiltin<"__builtin_amdgcn_sad_hi_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sad_u16 : ClangBuiltin<"__builtin_amdgcn_sad_u16">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_qsad_pk_u16_u8 : ClangBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_mqsad_pk_u16_u8 : ClangBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_mqsad_u32_u8 : ClangBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, - Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pk_u8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_icmp : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>]>; + [IntrNoMem, IntrConvergent, + ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_fcmp : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>]>; + [IntrNoMem, IntrConvergent, + ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ballot : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_readfirstlane : ClangBuiltin<"__builtin_amdgcn_readfirstlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The lane argument must be uniform across the currently active threads of the // current wave. Otherwise, the result is undefined. def int_amdgcn_readlane : ClangBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // The value to write and lane select arguments must be uniform across the // currently active threads of the current wave. Otherwise, the result is @@ -1687,32 +1683,32 @@ def int_amdgcn_writelane : llvm_i32_ty, // uniform lane select llvm_i32_ty // returned by all lanes other than the selected one ], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mulhi_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty], +def int_amdgcn_mulhi_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) @@ -1724,7 +1720,7 @@ def int_amdgcn_ds_gws_init : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrWriteMem, - IntrInaccessibleMemOnly, IntrWillReturn], "", + IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1735,7 +1731,7 @@ def int_amdgcn_ds_gws_barrier : ClangBuiltin<"__builtin_amdgcn_ds_gws_barrier">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1744,7 +1740,7 @@ def int_amdgcn_ds_gws_sema_v : ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_v">, Intrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1753,7 +1749,7 @@ def int_amdgcn_ds_gws_sema_br : ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_br">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1762,7 +1758,7 @@ def int_amdgcn_ds_gws_sema_p : ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_p">, Intrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1771,7 +1767,7 @@ def int_amdgcn_ds_gws_sema_release_all : ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">, Intrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "", + [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; @@ -1779,33 +1775,33 @@ def int_amdgcn_ds_gws_sema_release_all : // Copies the source value to the destination value, with the guarantee that // the source value is computed as if the entire program were executed in WQM. def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], - [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Copies the source value to the destination value, such that the source // is computed as if the entire program were executed in WQM if any other // program code executes in WQM. def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty], - [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Return true if at least one thread within the pixel quad passes true into // the function. def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], - [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn] + [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // If false, set EXEC=0 for the current thread until the end of program. // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn? -def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; +def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], [IntrNoCallback, IntrNoFree]>; def int_amdgcn_endpgm : ClangBuiltin<"__builtin_amdgcn_endpgm">, - Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects] + Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects, IntrNoCallback, IntrNoFree] >; // If false, mark all active lanes as helper lanes until the end of program. def int_amdgcn_wqm_demote : Intrinsic<[], - [llvm_i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly] + [llvm_i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly, IntrNoCallback, IntrNoFree] >; // Copies the active channels of the source value to the destination value, @@ -1815,16 +1811,16 @@ def int_amdgcn_wqm_demote : Intrinsic<[], // undefined value. def int_amdgcn_strict_wwm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, - IntrConvergent, IntrWillReturn] + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Deprecated. Use int_amdgcn_strict_wwm instead. def int_amdgcn_wwm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, - IntrConvergent, IntrWillReturn] + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_strict_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, - IntrConvergent, IntrWillReturn] + IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // Given a value, copies it while setting all the inactive lanes to a given @@ -1835,18 +1831,18 @@ def int_amdgcn_set_inactive : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, // value to be copied LLVMMatchType<0>], // value for the inactive lanes to take - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, - Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>] >; // Return if the given flat pointer points to a prvate memory address. def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, - Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>, IntrWillReturn] + DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>] >; //===----------------------------------------------------------------------===// @@ -1855,11 +1851,11 @@ def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">, def int_amdgcn_s_dcache_inv_vol : ClangBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_buffer_wbinvl1_vol : ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; //===----------------------------------------------------------------------===// // VI Intrinsics @@ -1872,7 +1868,7 @@ def int_amdgcn_mov_dpp : llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.update.dpp.i32 // Should be equivalent to: @@ -1884,37 +1880,37 @@ def int_amdgcn_update_dpp : llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_dcache_wb : ClangBuiltin<"__builtin_amdgcn_s_dcache_wb">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_dcache_wb_vol : ClangBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_memrealtime : ClangBuiltin<"__builtin_amdgcn_s_memrealtime">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.ds.permute def int_amdgcn_ds_permute : ClangBuiltin<"__builtin_amdgcn_ds_permute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.ds.bpermute def int_amdgcn_ds_bpermute : ClangBuiltin<"__builtin_amdgcn_ds_bpermute">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.perm def int_amdgcn_perm : ClangBuiltin<"__builtin_amdgcn_perm">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, IntrNoCallback, IntrNoFree]>; //===----------------------------------------------------------------------===// // GFX9 Intrinsics @@ -1931,7 +1927,7 @@ class AMDGPUGlobalLoadLDS : Intrinsic < // bit 2 = dlc on gfx10+)) // bit 4 = scc/nt on gfx90a+)) [IntrWillReturn, NoCapture>, NoCapture>, - ImmArg>, ImmArg>, ImmArg>], + ImmArg>, ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; @@ -1944,14 +1940,14 @@ def int_amdgcn_permlane16 : ClangBuiltin<"__builtin_amdgcn_permlane16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.permlanex16 def int_amdgcn_permlanex16 : ClangBuiltin<"__builtin_amdgcn_permlanex16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>, ImmArg>]>; + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.mov.dpp8.i32 // is a 32-bit constant whose high 8 bits must be zero which selects @@ -1960,18 +1956,18 @@ def int_amdgcn_mov_dpp8 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, - ImmArg>]>; + ImmArg>, IntrNoCallback, IntrNoFree]>; def int_amdgcn_s_get_waveid_in_workgroup : ClangBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, Intrinsic<[llvm_i32_ty], [], - [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; class AMDGPUGlobalAtomicRtn : Intrinsic < [vt], [llvm_anyptr_ty, // vaddr vt], // vdata(VGPR) - [IntrArgMemOnly, IntrWillReturn, NoCapture>], "", + [IntrArgMemOnly, IntrWillReturn, NoCapture>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn; @@ -1981,10 +1977,10 @@ def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn; // is i32 or i64. // and are both v3f16 or both v3f32. def int_amdgcn_image_bvh_intersect_ray : - Intrinsic<[llvm_v4i32_ty], + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_anyint_ty, llvm_float_ty, llvm_v3f32_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_v4i32_ty], - [IntrReadMem, IntrWillReturn]>; + [IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>; //===----------------------------------------------------------------------===// // GFX11 Intrinsics @@ -1994,17 +1990,17 @@ def int_amdgcn_image_bvh_intersect_ray : def int_amdgcn_permlane64 : ClangBuiltin<"__builtin_amdgcn_permlane64">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrNoMem, IntrConvergent, IntrWillReturn]>; + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ds_add_gs_reg_rtn : ClangBuiltin<"__builtin_amdgcn_ds_add_gs_reg_rtn">, Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], - [ImmArg>, IntrHasSideEffects, IntrWillReturn]>; + [ImmArg>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ds_sub_gs_reg_rtn : ClangBuiltin<"__builtin_amdgcn_ds_sub_gs_reg_rtn">, Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty], - [ImmArg>, IntrHasSideEffects, IntrWillReturn]>; + [ImmArg>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; def int_amdgcn_ds_bvh_stack_rtn : Intrinsic< @@ -2015,7 +2011,7 @@ def int_amdgcn_ds_bvh_stack_rtn : llvm_v4i32_ty, // %data1 llvm_i32_ty, // %offset ], - [ImmArg>, IntrWillReturn] + [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; // WMMA (Wave Matrix Multiply-Accumulate) intrinsics @@ -2031,7 +2027,7 @@ class AMDGPUWmmaIntrinsic : AB, // %B LLVMMatchType<0>, // %C ], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicOPSEL : @@ -2043,7 +2039,7 @@ class AMDGPUWmmaIntrinsicOPSEL : LLVMMatchType<0>, // %C llvm_i1_ty, // %high ], - [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrConvergent, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicIU : @@ -2057,7 +2053,7 @@ class AMDGPUWmmaIntrinsicIU : LLVMMatchType<0>, // %C llvm_i1_ty, // %clamp ], - [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg>, ImmArg>, ImmArg>] + [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>, ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic; @@ -2080,7 +2076,7 @@ def int_amdgcn_s_wait_event_export_ready : // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2 : ClangBuiltin<"__builtin_amdgcn_fdot2">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_float_ty], // %r [ llvm_v2f16_ty, // %a @@ -2088,42 +2084,42 @@ def int_amdgcn_fdot2 : llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2_f16_f16 : ClangBuiltin<"__builtin_amdgcn_fdot2_f16_f16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_half_ty], // %r [ llvm_v2f16_ty, // %a llvm_v2f16_ty, // %b llvm_half_ty // %c ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // bf16 %r = llvm.amdgcn.fdot2.bf16.bf16(v2bf16 %a, v2bf16 %b, bf16 %c) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2_bf16_bf16 : ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i16_ty], // %r [ llvm_v2i16_ty, // %a llvm_v2i16_ty, // %b llvm_i16_ty // %c ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // f32 %r = llvm.amdgcn.fdot2.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2_f32_bf16 : ClangBuiltin<"__builtin_amdgcn_fdot2_f32_bf16">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_float_ty], // %r [ llvm_v2i16_ty, // %a @@ -2131,14 +2127,14 @@ def int_amdgcn_fdot2_f32_bf16 : llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_sdot2 : ClangBuiltin<"__builtin_amdgcn_sdot2">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_v2i16_ty, // %a @@ -2146,14 +2142,14 @@ def int_amdgcn_sdot2 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_udot2 : ClangBuiltin<"__builtin_amdgcn_udot2">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_v2i16_ty, // %a @@ -2161,14 +2157,14 @@ def int_amdgcn_udot2 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sdot4 : ClangBuiltin<"__builtin_amdgcn_sdot4">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2176,14 +2172,14 @@ def int_amdgcn_sdot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_udot4 : ClangBuiltin<"__builtin_amdgcn_udot4">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2191,7 +2187,7 @@ def int_amdgcn_udot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sudot4(i1 %a_sign, v4i8 (as i32) %a, i1 %b_sign, v4i8 (as i32) %b, i32 %c, i1 %clamp) @@ -2201,7 +2197,7 @@ def int_amdgcn_udot4 : // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sudot4 : ClangBuiltin<"__builtin_amdgcn_sudot4">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i1_ty, // %a_sign @@ -2211,7 +2207,7 @@ def int_amdgcn_sudot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>] >; @@ -2220,7 +2216,7 @@ def int_amdgcn_sudot4 : // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sdot8 : ClangBuiltin<"__builtin_amdgcn_sdot8">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2228,7 +2224,7 @@ def int_amdgcn_sdot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) @@ -2236,7 +2232,7 @@ def int_amdgcn_sdot8 : // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_udot8 : ClangBuiltin<"__builtin_amdgcn_udot8">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i32_ty, // %a @@ -2244,7 +2240,7 @@ def int_amdgcn_udot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sudot8(i1 %a_sign, v8i4 (as i32) %a, i1 %b_sign, v8i4 (as i32) %b, i32 %c, i1 %clamp) @@ -2255,7 +2251,7 @@ def int_amdgcn_udot8 : // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sudot8 : ClangBuiltin<"__builtin_amdgcn_sudot8">, - Intrinsic< + DefaultAttrsIntrinsic< [llvm_i32_ty], // %r [ llvm_i1_ty, // %a_sign @@ -2265,7 +2261,7 @@ def int_amdgcn_udot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>] >; @@ -2278,10 +2274,10 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn; // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp class AMDGPUMfmaIntrinsic : ClangBuiltin, - Intrinsic<[DestTy], + DefaultAttrsIntrinsic<[DestTy], [SrcABTy, SrcABTy, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x1f32 : AMDGPUMfmaIntrinsic; @@ -2334,10 +2330,10 @@ def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic; def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn; -def int_amdgcn_ds_fadd_v2bf16 : Intrinsic< +def int_amdgcn_ds_fadd_v2bf16 : DefaultAttrsIntrinsic< [llvm_v2i16_ty], [LLVMQualPointerType, llvm_v2i16_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture>]>, + [IntrArgMemOnly, NoCapture>]>, ClangBuiltin<"__builtin_amdgcn_ds_atomic_fadd_v2bf16">; def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic; @@ -2359,10 +2355,10 @@ defm int_amdgcn_mfma_f32_32x32x16 : AMDGPUMFp8MfmaIntrinsic; // llvm.amdgcn.smfmac.?32.* vdst, srcA, srcB, srcC, index, cbsz, abid class AMDGPUMSmfmacIntrinsic : ClangBuiltin, - Intrinsic<[DestTy], + DefaultAttrsIntrinsic<[DestTy], [SrcA, SrcB, DestTy, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrWillReturn, + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>]>; def int_amdgcn_smfmac_f32_16x16x32_f16 : AMDGPUMSmfmacIntrinsic; @@ -2386,92 +2382,92 @@ defm int_amdgcn_smfmac_f32_32x32x32 : AMDGPUMFp8SmfmacIntrinsic; // llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3] // byte_sel selects byte from srcA. def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3] def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">, - Intrinsic<[llvm_float_ty], + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel // word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes. def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">, - Intrinsic<[llvm_v2f32_ty], + DefaultAttrsIntrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel. def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">, - Intrinsic<[llvm_v2f32_ty], + DefaultAttrsIntrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel // word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes. def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] // byte_sel selects byte to write into vdst. def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; // llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, - Intrinsic<[llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, ImmArg>]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. // ===----------------------------------------------------------------------===// def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], - [llvm_i1_ty], [IntrConvergent, IntrWillReturn] + [llvm_i1_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], - [llvm_anyint_ty], [IntrConvergent, IntrWillReturn] + [llvm_anyint_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty, LLVMMatchType<0>], - [IntrNoMem, IntrConvergent, IntrWillReturn] + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], - [llvm_anyint_ty], [IntrConvergent, IntrWillReturn] + [llvm_anyint_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], - [IntrConvergent, IntrWillReturn]>; + [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // Represent unreachable in a divergent region. def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; // Emit 2.5 ulp, no denormal division. Should only be inserted by // pass based on !fpmath metadata. -def int_amdgcn_fdiv_fast : Intrinsic< +def int_amdgcn_fdiv_fast : DefaultAttrsIntrinsic< [llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; // Represent a relocation constant. -def int_amdgcn_reloc_constant : Intrinsic< +def int_amdgcn_reloc_constant : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_metadata_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable] >; } diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll index 932f63d..5748c23 100644 --- a/llvm/test/Bitcode/compatibility-3.6.ll +++ b/llvm/test/Bitcode/compatibility-3.6.ll @@ -981,7 +981,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #35 + ; CHECK: call void @f.nobuiltin() #36 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #11 @@ -1183,9 +1183,10 @@ define void @intrinsics.codegen() { ; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #31 = { nounwind memory(argmem: read) } ; CHECK: attributes #32 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #33 = { nounwind memory(read) } -; CHECK: attributes #34 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #35 = { builtin } +; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #34 = { nocallback nounwind } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #36 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll index f32ae0a..513d6fe 100644 --- a/llvm/test/Bitcode/compatibility-3.7.ll +++ b/llvm/test/Bitcode/compatibility-3.7.ll @@ -1022,7 +1022,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #38 + ; CHECK: call void @f.nobuiltin() #39 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1246,9 +1246,10 @@ define void @misc.metadata() { ; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #34 = { nounwind memory(argmem: read) } ; CHECK: attributes #35 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #36 = { nounwind memory(read) } -; CHECK: attributes #37 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #38 = { builtin } +; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #37 = { nocallback nounwind } +; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #39 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll index 8e9a1a1..d4755d5 100644 --- a/llvm/test/Bitcode/compatibility-3.8.ll +++ b/llvm/test/Bitcode/compatibility-3.8.ll @@ -1170,7 +1170,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #41 + ; CHECK: call void @f.nobuiltin() #42 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1556,9 +1556,10 @@ normal: ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { nounwind memory(argmem: read) } ; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nounwind memory(read) } -; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #41 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll index ebb50e4..eefac4e 100644 --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -1241,7 +1241,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #42 + ; CHECK: call void @f.nobuiltin() #43 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1588,7 +1588,7 @@ normal: } declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } @@ -1629,10 +1629,11 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { nounwind memory(argmem: read) } ; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nounwind memory(read) } -; CHECK: attributes #40 = { memory(write) } -; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #42 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { memory(write) } +; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll index 3c3f943..e9731dc 100644 --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -1241,7 +1241,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #42 + ; CHECK: call void @f.nobuiltin() #43 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1606,7 +1606,7 @@ normal: declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 ;; Constant Expressions @@ -1654,10 +1654,11 @@ define i8** @constexpr() { ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { nounwind memory(argmem: read) } ; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nounwind memory(read) } -; CHECK: attributes #40 = { memory(write) } -; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #42 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { memory(write) } +; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll index cc7e6fe..2ef54fa 100644 --- a/llvm/test/Bitcode/compatibility-5.0.ll +++ b/llvm/test/Bitcode/compatibility-5.0.ll @@ -1248,7 +1248,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #43 + ; CHECK: call void @f.nobuiltin() #44 ; When used in a non-strictfp function the strictfp callsite attribute ; should get translated to nobuiltin. @@ -1618,10 +1618,10 @@ normal: declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 declare void @f.speculatable() speculatable -; CHECK: declare void @f.speculatable() #41 +; CHECK: declare void @f.speculatable() #42 ;; Constant Expressions @@ -1669,11 +1669,12 @@ define i8** @constexpr() { ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { nounwind memory(argmem: read) } ; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nounwind memory(read) } -; CHECK: attributes #40 = { memory(write) } -; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #43 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { memory(write) } +; CHECK: attributes #42 = { speculatable } +; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #44 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll index 125cfac..4d9c33f 100644 --- a/llvm/test/Bitcode/compatibility-6.0.ll +++ b/llvm/test/Bitcode/compatibility-6.0.ll @@ -1259,7 +1259,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #43 + ; CHECK: call void @f.nobuiltin() #44 ; When used in a non-strictfp function the strictfp callsite attribute ; should get translated to nobuiltin. @@ -1629,10 +1629,10 @@ normal: declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #40 +; CHECK: declare void @f.writeonly() #41 declare void @f.speculatable() speculatable -; CHECK: declare void @f.speculatable() #41 +; CHECK: declare void @f.speculatable() #42 ;; Constant Expressions @@ -1680,11 +1680,12 @@ define i8** @constexpr() { ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { nounwind memory(argmem: read) } ; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nounwind memory(read) } -; CHECK: attributes #40 = { memory(write) } -; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #43 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { memory(write) } +; CHECK: attributes #42 = { speculatable } +; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #44 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 60c2e37..a4985bf 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1540,7 +1540,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #50 + ; CHECK: call void @f.nobuiltin() #51 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1915,10 +1915,10 @@ normal: declare void @f.writeonly() writeonly -; CHECK: declare void @f.writeonly() #41 +; CHECK: declare void @f.writeonly() #42 declare void @f.speculatable() speculatable -; CHECK: declare void @f.speculatable() #42 +; CHECK: declare void @f.speculatable() #43 ;; Constant Expressions @@ -1929,16 +1929,16 @@ define i8** @constexpr() { define void @instructions.strictfp() strictfp { call void @f.strictfp() strictfp - ; CHECK: call void @f.strictfp() #43 + ; CHECK: call void @f.strictfp() #44 ret void } declare void @f.nosanitize_coverage() nosanitize_coverage -; CHECK: declare void @f.nosanitize_coverage() #44 +; CHECK: declare void @f.nosanitize_coverage() #45 declare void @f.disable_sanitizer_instrumentation() disable_sanitizer_instrumentation -; CHECK: declare void @f.disable_sanitizer_instrumentation() #45 +; CHECK: declare void @f.disable_sanitizer_instrumentation() #46 ; immarg attribute declare void @llvm.test.immarg.intrinsic(i32 immarg) @@ -1961,10 +1961,10 @@ declare void @f.allocsize_two(i32, i32) allocsize(1, 0) ; CHECK: declare void @f.allocsize_two(i32, i32) declare void @f.nosanitize_bounds() nosanitize_bounds -; CHECK: declare void @f.nosanitize_bounds() #48 +; CHECK: declare void @f.nosanitize_bounds() #49 declare void @f.allockind() allockind("alloc,uninitialized") -; CHECK: declare void @f.allockind() #49 +; CHECK: declare void @f.allockind() #50 ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } @@ -2005,18 +2005,19 @@ declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } ; CHECK: attributes #37 = { nounwind memory(argmem: read) } ; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } -; CHECK: attributes #39 = { nounwind memory(read) } -; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } -; CHECK: attributes #41 = { memory(write) } -; CHECK: attributes #42 = { speculatable } -; CHECK: attributes #43 = { strictfp } -; CHECK: attributes #44 = { nosanitize_coverage } -; CHECK: attributes #45 = { disable_sanitizer_instrumentation } -; CHECK: attributes #46 = { allocsize(0) } -; CHECK: attributes #47 = { allocsize(1,0) } -; CHECK: attributes #48 = { nosanitize_bounds } -; CHECK: attributes #49 = { allockind("alloc,uninitialized") } -; CHECK: attributes #50 = { builtin } +; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) } +; CHECK: attributes #40 = { nocallback nounwind } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #42 = { memory(write) } +; CHECK: attributes #43 = { speculatable } +; CHECK: attributes #44 = { strictfp } +; CHECK: attributes #45 = { nosanitize_coverage } +; CHECK: attributes #46 = { disable_sanitizer_instrumentation } +; CHECK: attributes #47 = { allocsize(0) } +; CHECK: attributes #48 = { allocsize(1,0) } +; CHECK: attributes #49 = { nosanitize_bounds } +; CHECK: attributes #50 = { allockind("alloc,uninitialized") } +; CHECK: attributes #51 = { builtin } ;; Metadata diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index 163bd3ee..0af30a2 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -928,7 +928,7 @@ attributes #4 = { nounwind sanitize_address } attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" } ; AKF_HSA: attributes #[[ATTR3]] = { nounwind } @@ -936,7 +936,7 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ; AKF_HSA: attributes #[[ATTR5]] = { nounwind sanitize_address } ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 4acf31b..b3e7a55 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -642,11 +642,11 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll index 9d8d4a1..bda5a5b 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -414,10 +414,10 @@ attributes #1 = { nounwind } ; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. -; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll index aa3fff4..3b4a0b8 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll @@ -40,7 +40,6 @@ define amdgpu_cs void @test_simple_indirect_call() { ; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm -; ; GFX10-LABEL: test_simple_indirect_call: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_getpc_b64 s[8:9] @@ -69,8 +68,8 @@ declare i64 @llvm.amdgcn.s.getpc() #0 attributes #0 = { nounwind readnone speculatable willreturn } ;. -; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind speculatable willreturn memory(none) } +; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll index 33dae82..06f653b 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll @@ -9,7 +9,7 @@ target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @lds_store(i32 %i) sanitize_address { entry: ; CHECK-LABEL: @lds_store( - ; CHECK-NOT: call + ; CHECK-NOT: call {{[a-zA-Z]}} %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i store i32 0, ptr addrspace(3) %arrayidx1, align 4 ret void @@ -18,7 +18,7 @@ entry: define protected amdgpu_kernel void @lds_load(i32 %i) sanitize_address { entry: ; CHECK-LABEL: @lds_load( - ; CHECK-NOT: call + ; CHECK-NOT: call {{[a-zA-Z]}} %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i %0 = load i32, ptr addrspace(3) %arrayidx1, align 4 ret void diff --git a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll index 4140f20..cf91d93 100644 --- a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll +++ b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll @@ -45,8 +45,7 @@ entry: ; addrspacecasted' alloca. ; CHECK-LABEL: @test_inliner_flat_ptr( ; CHECK: call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NOT: call -; CHECK-NOT: call +; CHECK-NOT: call {{[.*]}}@ define amdgpu_kernel void @test_inliner_flat_ptr(float addrspace(1)* nocapture %a, i32 %n) { entry: %pvt_arr = alloca [64 x float], align 4, addrspace(5) diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index 2ed04de..ebf8f54 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -245,7 +245,7 @@ define void @pos_multiple() { ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind willreturn } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}