llvm.mlir.global linkonce @linkonce(42 : i32) : i32
// CHECK: @weak = weak global i32 42
llvm.mlir.global weak @weak(42 : i32) : i32
-// CHECK: @common = common global i32 42
-llvm.mlir.global common @common(42 : i32) : i32
-// CHECK: @appending = appending global i32 42
-llvm.mlir.global appending @appending(42 : i32) : i32
+// CHECK: @common = common global i32 0
+llvm.mlir.global common @common(0 : i32) : i32
+// CHECK: @appending = appending global [3 x i32] [i32 1, i32 2, i32 3]
+llvm.mlir.global appending @appending(dense<[1,2,3]> : vector<3xi32>) : !llvm.array<3xi32>
// CHECK: @extern_weak = extern_weak global i32
llvm.mlir.global extern_weak @extern_weak() : i32
// CHECK: @linkonce_odr = linkonce_odr global i32 42
llvm.return %1 : !llvm.ptr<i32, 2>
}
-llvm.func @stringconstant() -> !llvm.ptr<i8> {
- %1 = llvm.mlir.constant("Hello world!") : !llvm.ptr<i8>
+llvm.func @stringconstant() -> !llvm.array<12 x i8> {
+ %1 = llvm.mlir.constant("Hello world!") : !llvm.array<12 x i8>
// CHECK: ret [12 x i8] c"Hello world!"
- llvm.return %1 : !llvm.ptr<i8>
+ llvm.return %1 : !llvm.array<12 x i8>
}
llvm.func @noreach() {
llvm.func @atomicrmw(
%f32_ptr : !llvm.ptr<f32>, %f32 : f32,
%i32_ptr : !llvm.ptr<i32>, %i32 : i32) {
- // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} unordered
- %0 = llvm.atomicrmw fadd %f32_ptr, %f32 unordered : f32
- // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} unordered
- %1 = llvm.atomicrmw fsub %f32_ptr, %f32 unordered : f32
+ // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} monotonic
+ %0 = llvm.atomicrmw fadd %f32_ptr, %f32 monotonic : f32
+ // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} monotonic
+ %1 = llvm.atomicrmw fsub %f32_ptr, %f32 monotonic : f32
// CHECK: atomicrmw xchg float* %{{.*}}, float %{{.*}} monotonic
%2 = llvm.atomicrmw xchg %f32_ptr, %f32 monotonic : f32
// CHECK: atomicrmw add i32* %{{.*}}, i32 %{{.*}} acquire
%5 = llvm.atomicrmw _and %i32_ptr, %i32 acq_rel : i32
// CHECK: atomicrmw nand i32* %{{.*}}, i32 %{{.*}} seq_cst
%6 = llvm.atomicrmw nand %i32_ptr, %i32 seq_cst : i32
- // CHECK: atomicrmw or i32* %{{.*}}, i32 %{{.*}} unordered
- %7 = llvm.atomicrmw _or %i32_ptr, %i32 unordered : i32
- // CHECK: atomicrmw xor i32* %{{.*}}, i32 %{{.*}} unordered
- %8 = llvm.atomicrmw _xor %i32_ptr, %i32 unordered : i32
- // CHECK: atomicrmw max i32* %{{.*}}, i32 %{{.*}} unordered
- %9 = llvm.atomicrmw max %i32_ptr, %i32 unordered : i32
- // CHECK: atomicrmw min i32* %{{.*}}, i32 %{{.*}} unordered
- %10 = llvm.atomicrmw min %i32_ptr, %i32 unordered : i32
- // CHECK: atomicrmw umax i32* %{{.*}}, i32 %{{.*}} unordered
- %11 = llvm.atomicrmw umax %i32_ptr, %i32 unordered : i32
- // CHECK: atomicrmw umin i32* %{{.*}}, i32 %{{.*}} unordered
- %12 = llvm.atomicrmw umin %i32_ptr, %i32 unordered : i32
+ // CHECK: atomicrmw or i32* %{{.*}}, i32 %{{.*}} monotonic
+ %7 = llvm.atomicrmw _or %i32_ptr, %i32 monotonic : i32
+ // CHECK: atomicrmw xor i32* %{{.*}}, i32 %{{.*}} monotonic
+ %8 = llvm.atomicrmw _xor %i32_ptr, %i32 monotonic : i32
+ // CHECK: atomicrmw max i32* %{{.*}}, i32 %{{.*}} monotonic
+ %9 = llvm.atomicrmw max %i32_ptr, %i32 monotonic : i32
+ // CHECK: atomicrmw min i32* %{{.*}}, i32 %{{.*}} monotonic
+ %10 = llvm.atomicrmw min %i32_ptr, %i32 monotonic : i32
+ // CHECK: atomicrmw umax i32* %{{.*}}, i32 %{{.*}} monotonic
+ %11 = llvm.atomicrmw umax %i32_ptr, %i32 monotonic : i32
+ // CHECK: atomicrmw umin i32* %{{.*}}, i32 %{{.*}} monotonic
+ %12 = llvm.atomicrmw umin %i32_ptr, %i32 monotonic : i32
llvm.return
}
llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } {
// CHECK: %[[a1:[0-9]+]] = alloca i8
%0 = llvm.mlir.constant(0 : i32) : i32
- %1 = llvm.mlir.constant("\01") : !llvm.array<1 x i8>
+ %1 = llvm.mlir.constant(dense<0> : vector<1xi8>) : !llvm.array<1 x i8>
%2 = llvm.mlir.addressof @_ZTIi : !llvm.ptr<ptr<i8>>
%3 = llvm.bitcast %2 : !llvm.ptr<ptr<i8>> to !llvm.ptr<i8>
%4 = llvm.mlir.null : !llvm.ptr<ptr<i8>>
// CHECK: %{{[0-9]+}} = landingpad { i8*, i32 }
// CHECK-NEXT: catch i8** null
// CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
-// CHECK-NEXT: filter [1 x i8] c"\01"
+// CHECK-NEXT: filter [1 x i8] zeroinitializer
%7 = llvm.landingpad (catch %4 : !llvm.ptr<ptr<i8>>) (catch %3 : !llvm.ptr<i8>) (filter %1 : !llvm.array<1 x i8>) : !llvm.struct<(ptr<i8>, i32)>
// CHECK: br label %[[final:[0-9]+]]
llvm.br ^bb3
// -----
// CHECK-LABEL: @switch_args
-llvm.func @switch_args(%arg0: i32) {
+llvm.func @switch_args(%arg0: i32) -> i32 {
%0 = llvm.mlir.constant(5 : i32) : i32
%1 = llvm.mlir.constant(7 : i32) : i32
%2 = llvm.mlir.constant(11 : i32) : i32
}
// CHECK-LABEL: @switch_weights
-llvm.func @switch_weights(%arg0: i32) {
+llvm.func @switch_weights(%arg0: i32) -> i32 {
%0 = llvm.mlir.constant(19 : i32) : i32
%1 = llvm.mlir.constant(23 : i32) : i32
%2 = llvm.mlir.constant(29 : i32) : i32
}
llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
- %arg2 : vector<32 x f32>, %arg3 : i32,
+ %arg2 : vector<32 x f32>, %arg3: i32,
%arg4 : vector<16 x f32>, %arg5 : vector<4xf32>,
%arg6 : vector<4xf16>, %arg7 : vector<32 x i32>,
%arg8 : vector<16 x i32>, %arg9 : vector<4xi32>,
%arg10 : vector<2xi16>) -> vector<32 x f32> {
+ %csti32 = llvm.mlir.constant(42 : i32) : i32
+
// CHECK-LABEL: rocdl.xdlops
- // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 :
+ // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %csti32, %csti32, %csti32 :
(f32, f32, vector<32 x f32>,
i32, i32, i32) -> vector<32 x f32>
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %csti32, %csti32, %csti32 :
(f32, f32, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %csti32, %csti32, %csti32 :
(f32, f32, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %csti32, %csti32, %csti32 :
(f32, f32, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %csti32, %csti32, %csti32 :
(f32, f32, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>
- // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 :
+ // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %csti32, %csti32, %csti32 :
(vector<4xf16>, vector<4xf16>, vector<32 x f32>,
i32, i32, i32) -> vector<32 x f32>
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %csti32, %csti32, %csti32 :
(vector<4xf16>, vector<4xf16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %csti32, %csti32, %csti32 :
(vector<4xf16>, vector<4xf16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %csti32, %csti32, %csti32 :
(vector<4xf16>, vector<4xf16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %csti32, %csti32, %csti32 :
(vector<4xf16>, vector<4xf16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>
- // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 :
+ // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %csti32, %csti32, %csti32 :
(i32, i32, vector<32 x i32>,
i32, i32, i32) -> vector<32 x i32>
- // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %csti32, %csti32, %csti32 :
(i32, i32, vector<16 x i32>,
i32, i32, i32) -> vector<16 x i32>
- // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %csti32, %csti32, %csti32 :
(i32, i32, vector<4xi32>,
i32, i32, i32) -> vector<4xi32>
- // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %csti32, %csti32, %csti32 :
(i32, i32, vector<16 x i32>,
i32, i32, i32) -> vector<16 x i32>
- // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %csti32, %csti32, %csti32 :
(i32, i32, vector<4xi32>,
i32, i32, i32) -> vector<4xi32>
- // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 :
+ // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %csti32, %csti32, %csti32 :
(vector<2xi16>, vector<2xi16>, vector<32 x f32>,
i32, i32, i32) -> vector<32 x f32>
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %csti32, %csti32, %csti32 :
(vector<2xi16>, vector<2xi16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %csti32, %csti32, %csti32 :
(vector<2xi16>, vector<2xi16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>
- // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
+ // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %csti32, %csti32, %csti32 :
(vector<2xi16>, vector<2xi16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>
- // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
- %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
+ // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+ %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %csti32, %csti32, %csti32 :
(vector<2xi16>, vector<2xi16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>
}
llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32,
- %offset : i32, %glc : i1,
- %slc : i1, %vdata1 : vector<1xf32>,
+ %offset : i32, %vdata1 : vector<1xf32>,
%vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) {
+ %glc = llvm.mlir.constant(false) : i1
+ %slc = llvm.mlir.constant(true) : i1
// CHECK-LABEL: rocdl.mubuf
- // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+ // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
%r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
- // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+ // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
%r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
- // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+ // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
%r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>
- // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+ // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
- // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+ // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
- // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
+ // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 {{.*}}, i1 {{.*}})
rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>
llvm.return