// TODO should we convert this to an AND if the RHS is constant?
}
break;
+ case Intrinsic::x86_bmi_pext_32:
+ case Intrinsic::x86_bmi_pext_64:
+ if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ if (MaskC->isNullValue())
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
+ if (MaskC->isAllOnesValue())
+ return replaceInstUsesWith(CI, II->getArgOperand(0));
+
+ if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
+ uint64_t Src = SrcC->getZExtValue();
+ uint64_t Mask = MaskC->getZExtValue();
+ uint64_t Result = 0;
+ uint64_t BitToSet = 1;
+
+ while (Mask) {
+ // Isolate lowest set bit.
+ uint64_t BitToTest = Mask & -Mask;
+ if (BitToTest & Src)
+ Result |= BitToSet;
+
+ BitToSet <<= 1;
+ // Clear lowest set bit.
+ Mask &= Mask - 1;
+ }
+
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
+ }
+ }
+ break;
+ case Intrinsic::x86_bmi_pdep_32:
+ case Intrinsic::x86_bmi_pdep_64:
+ if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ if (MaskC->isNullValue())
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
+ if (MaskC->isAllOnesValue())
+ return replaceInstUsesWith(CI, II->getArgOperand(0));
+
+ if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
+ uint64_t Src = SrcC->getZExtValue();
+ uint64_t Mask = MaskC->getZExtValue();
+ uint64_t Result = 0;
+ uint64_t BitToTest = 1;
+
+ while (Mask) {
+ // Isolate lowest set bit.
+ uint64_t BitToSet = Mask & -Mask;
+ if (BitToTest & Src)
+ Result |= BitToSet;
+
+ BitToTest <<= 1;
+ // Clear lowest set bit;
+ Mask &= Mask - 1;
+ }
+
+ return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
+ }
+ }
+ break;
case Intrinsic::x86_vcvtph2ps_128:
case Intrinsic::x86_vcvtph2ps_256: {
declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
+declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone
+declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
; CHECK-LABEL: @test_x86_tbm_bextri_u32(
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
ret i64 %1
}
+
+define i32 @test_x86_pext_32_zero_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_zero_mask(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 0)
+ ret i32 %1
+}
+
+define i64 @test_x86_pext_64_zero_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_zero_mask(
+; CHECK-NEXT: ret i64 0
+;
+ %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 0)
+ ret i64 %1
+}
+
+define i32 @test_x86_pext_32_allones_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_allones_mask(
+; CHECK-NEXT: ret i32 %x
+;
+ %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 -1)
+ ret i32 %1
+}
+
+define i64 @test_x86_pext_64_allones_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_allones_mask(
+; CHECK-NEXT: ret i64 %x
+;
+ %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 -1)
+ ret i64 %1
+}
+
+define i32 @test_x86_pext_32_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_constant_fold(
+; CHECK-NEXT: ret i32 30001
+;
+ %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4042322160)
+ ret i32 %1
+}
+
+define i64 @test_x86_pext_64_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_constant_fold(
+; CHECK-NEXT: ret i64 1966210489
+;
+ %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -1085102592571150096)
+ ret i64 %1
+}
+
+define i32 @test_x86_pext_32_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_32_constant_fold_2(
+; CHECK-NEXT: ret i32 30224
+;
+ %1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4278190335)
+ ret i32 %1
+}
+
+define i64 @test_x86_pext_64_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pext_64_constant_fold_2(
+; CHECK-NEXT: ret i64 1980816570
+;
+ %1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -72056498804490496)
+ ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_zero_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_zero_mask(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 0)
+ ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_zero_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_zero_mask(
+; CHECK-NEXT: ret i64 0
+;
+ %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 0)
+ ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_allones_mask(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_allones_mask(
+; CHECK-NEXT: ret i32 %x
+;
+ %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 -1)
+ ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_allones_mask(i64 %x) nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_allones_mask(
+; CHECK-NEXT: ret i64 %x
+;
+ %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 -1)
+ ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_constant_fold(
+; CHECK-NEXT: ret i32 807407616
+;
+ %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4042322160)
+ ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_constant_fold() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_constant_fold(
+; CHECK-NEXT: ret i64 -1089641583808049024
+;
+ %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -1085102592571150096)
+ ret i64 %1
+}
+
+define i32 @test_x86_pdep_32_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_32_constant_fold_2(
+; CHECK-NEXT: ret i32 838860816
+;
+ %1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4278190335)
+ ret i32 %1
+}
+
+define i64 @test_x86_pdep_64_constant_fold_2() nounwind readnone {
+; CHECK-LABEL: @test_x86_pdep_64_constant_fold_2(
+; CHECK-NEXT: ret i64 -144114243170822144
+;
+ %1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -72056498804490496)
+ ret i64 %1
+}