From ded62411f730278016274361e5ce4e5234cc2957 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 12 Jul 2022 14:08:47 +0100 Subject: [PATCH] [DAG] SimplifyDemandedBits - AND/OR/XOR - attempt basic knownbits simplifications before calling SimplifyMultipleUseDemandedBits Noticed while investigating the SystemZ regressions in D77804, prefer handling the knownbits analysis/simplification in the bitop nodes directly before falling back to SimplifyMultipleUseDemandedBits --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 82 +++++++++++----------- .../CodeGen/SystemZ/store_nonbytesized_vecs.ll | 58 ++++++++------- 2 files changed, 69 insertions(+), 71 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d8f2554..d08b195 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1394,20 +1394,6 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); - // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { - SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - if (DemandedOp0 || DemandedOp1) { - Op0 = DemandedOp0 ? DemandedOp0 : Op0; - Op1 = DemandedOp1 ? DemandedOp1 : Op1; - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); - return TLO.CombineTo(Op, NewOp); - } - } - // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -1425,6 +1411,20 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + Known &= Known2; break; } @@ -1441,6 +1441,19 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) + return TLO.CombineTo(Op, Op1); + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) + return true; + // If the operation can be done in a smaller type, do so. + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + // Attempt to avoid multi-use ops if we don't need anything from them. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( @@ -1455,19 +1468,6 @@ bool TargetLowering::SimplifyDemandedBits( } } - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'or'. - if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) - return TLO.CombineTo(Op, Op0); - if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) - return TLO.CombineTo(Op, Op1); - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) - return true; - // If the operation can be done in a smaller type, do so. - if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; - Known |= Known2; break; } @@ -1484,20 +1484,6 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); - // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { - SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - if (DemandedOp0 || DemandedOp1) { - Op0 = DemandedOp0 ? DemandedOp0 : Op0; - Op1 = DemandedOp1 ? DemandedOp1 : Op1; - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); - return TLO.CombineTo(Op, NewOp); - } - } - // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1542,6 +1528,20 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + Known ^= Known2; break; } diff --git a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll index 897b27d..bac5f4c 100644 --- a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -77,40 +77,38 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: vlgvf %r0, %v26, 3 -; CHECK-NEXT: vlgvf %r5, %v24, 2 +; CHECK-NEXT: vlgvf %r4, %v24, 1 +; CHECK-NEXT: vlgvf %r3, %v24, 2 ; CHECK-NEXT: srlk %r1, %r0, 8 -; CHECK-NEXT: vlgvf %r3, %v24, 3 -; CHECK-NEXT: sth %r1, 28(%r2) -; CHECK-NEXT: vlgvf %r1, %v26, 0 -; CHECK-NEXT: risbgn %r14, %r5, 6, 164, 27 -; CHECK-NEXT: sllg %r4, %r3, 60 -; CHECK-NEXT: stc %r0, 30(%r2) -; CHECK-NEXT: rosbg %r14, %r3, 37, 63, 60 -; CHECK-NEXT: sllg %r3, %r14, 8 -; CHECK-NEXT: rosbg %r4, %r1, 4, 34, 29 -; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8 -; CHECK-NEXT: stg %r3, 8(%r2) -; CHECK-NEXT: vlgvf %r3, %v24, 1 -; CHECK-NEXT: sllg %r4, %r3, 58 -; CHECK-NEXT: rosbg %r4, %r5, 6, 36, 27 ; CHECK-NEXT: vlgvf %r5, %v24, 0 +; CHECK-NEXT: sth %r1, 28(%r2) +; CHECK-NEXT: sllg %r1, %r4, 58 ; CHECK-NEXT: sllg %r5, %r5, 25 -; CHECK-NEXT: rosbg %r5, %r3, 39, 63, 58 -; CHECK-NEXT: sllg %r3, %r5, 8 -; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8 -; CHECK-NEXT: vlgvf %r4, %v26, 1 -; CHECK-NEXT: stg %r3, 0(%r2) -; CHECK-NEXT: vlgvf %r3, %v26, 2 -; CHECK-NEXT: sllg %r5, %r4, 62 -; CHECK-NEXT: rosbg %r5, %r3, 2, 32, 31 -; CHECK-NEXT: rosbg %r5, %r0, 33, 63, 0 -; CHECK-NEXT: risbgn %r0, %r1, 4, 162, 29 -; CHECK-NEXT: rosbg %r0, %r4, 35, 63, 62 -; CHECK-NEXT: sllg %r0, %r0, 8 -; CHECK-NEXT: rosbg %r0, %r5, 56, 63, 8 -; CHECK-NEXT: stg %r0, 16(%r2) -; CHECK-NEXT: srlg %r0, %r5, 24 +; CHECK-NEXT: stc %r0, 30(%r2) +; CHECK-NEXT: rosbg %r1, %r3, 6, 36, 27 +; CHECK-NEXT: vlgvf %r3, %v24, 3 +; CHECK-NEXT: rosbg %r5, %r4, 39, 63, 58 +; CHECK-NEXT: sllg %r4, %r5, 8 +; CHECK-NEXT: rosbg %r1, %r3, 37, 63, 60 +; CHECK-NEXT: vlgvf %r5, %v26, 1 +; CHECK-NEXT: rosbg %r4, %r1, 56, 63, 8 +; CHECK-NEXT: stg %r4, 0(%r2) +; CHECK-NEXT: vlgvf %r4, %v26, 2 +; CHECK-NEXT: sllg %r14, %r5, 62 +; CHECK-NEXT: sllg %r3, %r3, 60 +; CHECK-NEXT: rosbg %r14, %r4, 2, 32, 31 +; CHECK-NEXT: rosbg %r14, %r0, 33, 63, 0 +; CHECK-NEXT: srlg %r0, %r14, 24 ; CHECK-NEXT: st %r0, 24(%r2) +; CHECK-NEXT: vlgvf %r0, %v26, 0 +; CHECK-NEXT: rosbg %r3, %r0, 4, 34, 29 +; CHECK-NEXT: sllg %r0, %r1, 8 +; CHECK-NEXT: rosbg %r3, %r5, 35, 63, 62 +; CHECK-NEXT: rosbg %r0, %r3, 56, 63, 8 +; CHECK-NEXT: stg %r0, 8(%r2) +; CHECK-NEXT: sllg %r0, %r3, 8 +; CHECK-NEXT: rosbg %r0, %r14, 56, 63, 8 +; CHECK-NEXT: stg %r0, 16(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 { -- 2.7.4