Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
+ def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
- Name == "avx512.mask.max.pd.128" || // Added in 5.0
- Name == "avx512.mask.max.pd.256" || // Added in 5.0
- Name == "avx512.mask.max.ps.128" || // Added in 5.0
- Name == "avx512.mask.max.ps.256" || // Added in 5.0
- Name == "avx512.mask.min.pd.128" || // Added in 5.0
- Name == "avx512.mask.min.pd.256" || // Added in 5.0
- Name == "avx512.mask.min.ps.128" || // Added in 5.0
- Name == "avx512.mask.min.ps.256" || // Added in 5.0
Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
- Name.startswith("avx512.mask.add.p") || // Added in 7.0
- Name.startswith("avx512.mask.sub.p") || // Added in 7.0
- Name.startswith("avx512.mask.mul.p") || // Added in 7.0
- Name.startswith("avx512.mask.div.p") || // Added in 7.0
+ Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
+ Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
+ Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
+ Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
+ Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
+ Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
+ Name.drop_front(18) == ".512") {
+ Intrinsic::ID IID;
+ if (Name[17] == 's')
+ IID = Intrinsic::x86_avx512_max_ps_512;
+ else
+ IID = Intrinsic::x86_avx512_max_pd_512;
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
+ Name.drop_front(18) == ".512") {
+ Intrinsic::ID IID;
+ if (Name[17] == 's')
+ IID = Intrinsic::x86_avx512_min_ps_512;
+ else
+ IID = Intrinsic::x86_avx512_min_pd_512;
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
Intrinsic::ctlz,
X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK,
X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
- X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
- X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK,
X86ISD::FMAXS, X86ISD::FMAXS_RND),
X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK,
X86ISD::FMAXS, X86ISD::FMAXS_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
- X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
- X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK,
X86ISD::FMINS, X86ISD::FMINS_RND),
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
+ X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
+ X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_min_ps_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mul_pd_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mul_ps_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
ret float %vecext.i
}
+define <8 x double> @test_mm512_mask_max_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_mask_max_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_max_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_mm512_maskz_max_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_maskz_max_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_max_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
+ ret <8 x double> %2
+}
+
+define <16 x float> @test_mm512_mask_max_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_mask_max_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_max_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W
+ ret <16 x float> %2
+}
+
+define <8 x double> @test_mm512_mask_max_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_mask_max_round_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_max_round_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W
+ ret <8 x double> %2
+}
+
+declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_mm512_maskz_max_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_maskz_max_round_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_max_round_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_mm512_max_round_pd(<8 x double> %__A, <8 x double> %__B) {
+; CHECK-LABEL: test_mm512_max_round_pd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ ret <8 x double> %0
+}
+
+define <16 x float> @test_mm512_maskz_max_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_maskz_max_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_max_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_mask_max_round_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_max_round_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W
+ ret <16 x float> %2
+}
+
+declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_mm512_maskz_max_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_maskz_max_round_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_max_round_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_mm512_max_round_ps(<16 x float> %__A, <16 x float> %__B) {
+; CHECK-LABEL: test_mm512_max_round_ps:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ ret <16 x float> %0
+}
+
+define <8 x double> @test_mm512_mask_min_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_mask_min_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_min_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_mm512_maskz_min_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_maskz_min_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_min_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_mm512_mask_min_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_mask_min_round_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_min_round_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__W
+ ret <8 x double> %2
+}
+
+declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_mm512_maskz_min_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
+; X86-LABEL: test_mm512_maskz_min_round_pd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_min_round_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_mm512_min_round_pd(<8 x double> %__A, <8 x double> %__B) {
+; CHECK-LABEL: test_mm512_min_round_pd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+entry:
+ %0 = tail call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %__A, <8 x double> %__B, i32 4)
+ ret <8 x double> %0
+}
+
+define <16 x float> @test_mm512_mask_min_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_mask_min_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_min_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_mm512_maskz_min_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_maskz_min_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_min_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_mask_min_round_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_min_round_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__W
+ ret <16 x float> %2
+}
+
+declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_mm512_maskz_min_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
+; X86-LABEL: test_mm512_maskz_min_round_ps:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_min_round_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_mm512_min_round_ps(<16 x float> %__A, <16 x float> %__B) {
+; CHECK-LABEL: test_mm512_min_round_ps:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %__A, <16 x float> %__B, i32 4)
+ ret <16 x float> %0
+}
+
declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) #9
declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) #9
declare float @llvm.fma.f32(float, float, float) #9
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret <16 x i32> %res
}
+
+define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; X86-LABEL: test_mm512_maskz_min_round_ps_sae:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_min_round_ps_sae:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; X86-LABEL: test_mm512_maskz_min_round_ps_current:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_min_round_ps_current:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+; X86-LABEL: test_mm512_mask_min_round_ps_sae:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_min_round_ps_sae:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+; X86-LABEL: test_mm512_mask_min_round_ps_current:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_min_round_ps_current:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; CHECK-LABEL: test_mm512_min_round_ps_sae:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; CHECK-LABEL: test_mm512_min_round_ps_current:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; X86-LABEL: test_mm512_maskz_max_round_ps_sae:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_max_round_ps_sae:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; X86-LABEL: test_mm512_maskz_max_round_ps_current:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_max_round_ps_current:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+; X86-LABEL: test_mm512_mask_max_round_ps_sae:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_max_round_ps_sae:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+; X86-LABEL: test_mm512_mask_max_round_ps_current:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_max_round_ps_current:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; CHECK-LABEL: test_mm512_max_round_ps_sae:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+; CHECK-LABEL: test_mm512_max_round_ps_current:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
; CHECK: ## %bb.0:
; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
- <8 x double>zeroinitializer, i8 -1, i32 4)
- ret <8 x double> %res
+ %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
+ ret <8 x double> %1
}
-declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
- <8 x double>, i8, i32)
+declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32)
define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
; CHECK-LABEL: test_vminpd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
- <8 x double>zeroinitializer, i8 -1, i32 4)
- ret <8 x double> %res
+ %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
+ ret <8 x double> %1
}
-declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
- <8 x double>, i8, i32)
+declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32)
define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
; CHECK-LABEL: test_mask_store_ss:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
}
define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
}
-declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32)
define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
}
define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
}
define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
; CHECK: ## %bb.0:
; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
}
-declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32)
declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
;CHECK-LABEL: stack_fold_maxpd_zmm
;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
ret <8 x double> %2
}
-declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32) nounwind readnone
define <8 x double> @stack_fold_maxpd_zmm_commutable(<8 x double> %a0, <8 x double> %a1) #1 {
;CHECK-LABEL: stack_fold_maxpd_zmm_commutable
;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
ret <8 x double> %2
}
;CHECK-LABEL: stack_fold_maxpd_zmm_commutable_kz
;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a1, <8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 4)
- ret <8 x double> %2
+ %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
+ ret <8 x double> %4
}
define <16 x float> @stack_fold_maxps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
;CHECK-LABEL: stack_fold_maxps_zmm
;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %2
}
-declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32) nounwind readnone
define <16 x float> @stack_fold_maxps_zmm_commutable(<16 x float> %a0, <16 x float> %a1) #1 {
;CHECK-LABEL: stack_fold_maxps_zmm_commutable
;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %2
}
;CHECK-LABEL: stack_fold_maxps_zmm_commutable_kz
;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a1, <16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
- ret <16 x float> %2
+ %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
+ %3 = bitcast i16 %mask to <16 x i1>
+ %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
+ ret <16 x float> %4
}
define <8 x double> @stack_fold_minpd_zmm(<8 x double> %a0, <8 x double> %a1) #0 {
;CHECK-LABEL: stack_fold_minpd_zmm
;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
ret <8 x double> %2
}
-declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32) nounwind readnone
define <8 x double> @stack_fold_minpd_zmm_commutable(<8 x double> %a0, <8 x double> %a1) #1 {
;CHECK-LABEL: stack_fold_minpd_zmm_commutable
;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> zeroinitializer, i8 -1, i32 4)
+ %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
ret <8 x double> %2
}
;CHECK-LABEL: stack_fold_minpd_zmm_commutable_kz
;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a1, <8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 4)
- ret <8 x double> %2
+ %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
+ %3 = bitcast i8 %mask to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
+ ret <8 x double> %4
}
define <16 x float> @stack_fold_minps_zmm(<16 x float> %a0, <16 x float> %a1) #0 {
;CHECK-LABEL: stack_fold_minps_zmm
;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %2
}
-declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32) nounwind readnone
define <16 x float> @stack_fold_minps_zmm_commutable(<16 x float> %a0, <16 x float> %a1) #1 {
;CHECK-LABEL: stack_fold_minps_zmm_commutable
;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
ret <16 x float> %2
}
;CHECK-LABEL: stack_fold_minps_zmm_commutable_kz
;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a1, <16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
- ret <16 x float> %2
+ %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
+ %3 = bitcast i16 %mask to <16 x i1>
+ %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
+ ret <16 x float> %4
}
define double @stack_fold_mulsd(double %a0, double %a1) {