return new ShuffleVectorInst(NewIntrinsic, Mask);
}
-/// Fold the following cases and accepts bswap and bitreverse intrinsics:
-/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
-/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
-template <Intrinsic::ID IntrID>
-static Instruction *foldBitOrderCrossLogicOp(Value *V,
- InstCombiner::BuilderTy &Builder) {
- static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
- "This helper only supports BSWAP and BITREVERSE intrinsics");
-
- Value *X, *Y;
- if (match(V, m_OneUse(m_BitwiseLogic(m_Value(X), m_Value(Y))))) {
- Value *OldReorderX, *OldReorderY;
- BinaryOperator::BinaryOps Op = cast<BinaryOperator>(V)->getOpcode();
-
- // If both X and Y are bswap/bitreverse, the transform reduces the number
- // of instructions even if there's multiuse.
- // If only one operand is bswap/bitreverse, we need to ensure the operand
- // have only one use.
- if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
- match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
- return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
- } else if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
- Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
- return BinaryOperator::Create(Op, OldReorderX, NewReorder);
- } else if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
- Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
- return BinaryOperator::Create(Op, NewReorder, OldReorderY);
- }
- }
- return nullptr;
-}
-
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
}
-
- if (Instruction *crossLogicOpFold =
- foldBitOrderCrossLogicOp<Intrinsic::bswap>(IIOperand, Builder)) {
- return crossLogicOpFold;
- }
-
break;
}
case Intrinsic::masked_load:
; Fold: BSWAP( OP( BSWAP(x), y ) ) -> OP( x, BSWAP(y) )
define i16 @bs_and_lhs_bs16(i16 %a, i16 %b) #0 {
; CHECK-LABEL: @bs_and_lhs_bs16(
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[A:%.*]]
-; CHECK-NEXT: ret i16 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: ret i16 [[TMP3]]
;
%1 = tail call i16 @llvm.bswap.i16(i16 %a)
%2 = and i16 %1, %b
define i16 @bs_or_lhs_bs16(i16 %a, i16 %b) #0 {
; CHECK-LABEL: @bs_or_lhs_bs16(
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[A:%.*]]
-; CHECK-NEXT: ret i16 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: ret i16 [[TMP3]]
;
%1 = tail call i16 @llvm.bswap.i16(i16 %a)
%2 = or i16 %1, %b
define i16 @bs_xor_lhs_bs16(i16 %a, i16 %b) #0 {
; CHECK-LABEL: @bs_xor_lhs_bs16(
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[A:%.*]]
-; CHECK-NEXT: ret i16 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: ret i16 [[TMP3]]
;
%1 = tail call i16 @llvm.bswap.i16(i16 %a)
%2 = xor i16 %1, %b
define i16 @bs_and_rhs_bs16(i16 %a, i16 %b) #0 {
; CHECK-LABEL: @bs_and_rhs_bs16(
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i16 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: ret i16 [[TMP3]]
;
%1 = tail call i16 @llvm.bswap.i16(i16 %b)
%2 = and i16 %a, %1
define i16 @bs_or_rhs_bs16(i16 %a, i16 %b) #0 {
; CHECK-LABEL: @bs_or_rhs_bs16(
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i16 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: ret i16 [[TMP3]]
;
%1 = tail call i16 @llvm.bswap.i16(i16 %b)
%2 = or i16 %a, %1
define i16 @bs_xor_rhs_bs16(i16 %a, i16 %b) #0 {
; CHECK-LABEL: @bs_xor_rhs_bs16(
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i16 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT: ret i16 [[TMP3]]
;
%1 = tail call i16 @llvm.bswap.i16(i16 %b)
%2 = xor i16 %a, %1
define i32 @bs_and_rhs_bs32(i32 %a, i32 %b) #0 {
; CHECK-LABEL: @bs_and_rhs_bs32(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = tail call i32 @llvm.bswap.i32(i32 %b)
%2 = and i32 %a, %1
define i32 @bs_or_rhs_bs32(i32 %a, i32 %b) #0 {
; CHECK-LABEL: @bs_or_rhs_bs32(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = tail call i32 @llvm.bswap.i32(i32 %b)
%2 = or i32 %a, %1
define i32 @bs_xor_rhs_bs32(i32 %a, i32 %b) #0 {
; CHECK-LABEL: @bs_xor_rhs_bs32(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = tail call i32 @llvm.bswap.i32(i32 %b)
%2 = xor i32 %a, %1
define i64 @bs_and_rhs_bs64(i64 %a, i64 %b) #0 {
; CHECK-LABEL: @bs_and_rhs_bs64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: ret i64 [[TMP3]]
;
%1 = tail call i64 @llvm.bswap.i64(i64 %b)
%2 = and i64 %a, %1
define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 {
; CHECK-LABEL: @bs_or_rhs_bs64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: ret i64 [[TMP3]]
;
%1 = tail call i64 @llvm.bswap.i64(i64 %b)
%2 = or i64 %a, %1
define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
; CHECK-LABEL: @bs_xor_rhs_bs64(
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret i64 [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT: ret i64 [[TMP3]]
;
%1 = tail call i64 @llvm.bswap.i64(i64 %b)
%2 = xor i64 %a, %1
define <2 x i32> @bs_and_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 {
; CHECK-LABEL: @bs_and_rhs_i32vec(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret <2 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
%2 = and <2 x i32> %a, %1
define <2 x i32> @bs_or_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 {
; CHECK-LABEL: @bs_or_rhs_i32vec(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret <2 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
%2 = or <2 x i32> %a, %1
define <2 x i32> @bs_xor_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 {
; CHECK-LABEL: @bs_xor_rhs_i32vec(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[B:%.*]]
-; CHECK-NEXT: ret <2 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
%2 = xor <2 x i32> %a, %1
; CHECK-LABEL: @bs_all_operand64_multiuse_both(
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
-; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[A]], [[B]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP3]])
; CHECK-NEXT: call void @use.i64(i64 [[TMP1]])
; CHECK-NEXT: call void @use.i64(i64 [[TMP2]])
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: ret i64 [[TMP4]]
;
%1 = tail call i64 @llvm.bswap.i64(i64 %a)
%2 = tail call i64 @llvm.bswap.i64(i64 %b)