From a9c5a98f810d380919d25c3073c61725d73d3c41 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 18 Jan 2023 13:29:37 -0600 Subject: [PATCH] [SystemZ] Improvement in tryRxSBG(). Only allow replacements of nodes that have a single user. This is better as simple instructions (e.g. XGRK) are one cycle faster, and it helps in cases where both inputs share a common node. Review: Ulrich Weigand --- llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 11 +++++++---- llvm/test/CodeGen/SystemZ/flt-rounds.ll | 23 ++++++++++------------ llvm/test/CodeGen/SystemZ/rnsbg-01.ll | 5 +++-- .../CodeGen/SystemZ/store_nonbytesized_vecs.ll | 23 +++++++++++----------- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 59d4639..250edf6 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1071,10 +1071,13 @@ bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { }; unsigned Count[] = { 0, 0 }; for (unsigned I = 0; I < 2; ++I) - while (expandRxSBG(RxSBG[I])) - // The widening or narrowing is expected to be free. - // Counting widening or narrowing as a saved operation will result in - // preferring an R*SBG over a simple shift/logical instruction. + while (RxSBG[I].Input->hasOneUse() && expandRxSBG(RxSBG[I])) + // In cases of multiple users it seems better to keep the simple + // instruction as they are one cycle faster, and it also helps in cases + // where both inputs share a common node. + // The widening or narrowing is expected to be free. Counting widening + // or narrowing as a saved operation will result in preferring an R*SBG + // over a simple shift/logical instruction. if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND && RxSBG[I].Input.getOpcode() != ISD::TRUNCATE) Count[I] += 1; diff --git a/llvm/test/CodeGen/SystemZ/flt-rounds.ll b/llvm/test/CodeGen/SystemZ/flt-rounds.ll index 5500010..37a7806 100644 --- a/llvm/test/CodeGen/SystemZ/flt-rounds.ll +++ b/llvm/test/CodeGen/SystemZ/flt-rounds.ll @@ -8,11 +8,10 @@ define dso_local signext i32 @test_flt_rounds() nounwind { ; CHECK-LABEL: test_flt_rounds: ; CHECK: # %bb.0: ; CHECK-NEXT: efpc %r0 -; CHECK-NEXT: lr %r1, %r0 -; CHECK-NEXT: nilf %r1, 3 -; CHECK-NEXT: rxsbg %r1, %r0, 63, 63, 63 -; CHECK-NEXT: xilf %r1, 1 -; CHECK-NEXT: llgfr %r2, %r1 +; CHECK-NEXT: nilf %r0, 3 +; CHECK-NEXT: rxsbg %r0, %r0, 33, 63, 63 +; CHECK-NEXT: xilf %r0, 1 +; CHECK-NEXT: llgfr %r2, %r0 ; CHECK-NEXT: br %r14 %1 = call i32 @llvm.get.rounding() ret i32 %1 @@ -25,18 +24,16 @@ define dso_local signext i32 @test_order(i32 noundef signext %0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r13, %r15, 104(%r15) ; CHECK-NEXT: aghi %r15, -160 -; CHECK-NEXT: efpc %r0 -; CHECK-NEXT: lr %r13, %r0 +; CHECK-NEXT: efpc %r13 ; CHECK-NEXT: nilf %r13, 3 -; CHECK-NEXT: rxsbg %r13, %r0, 63, 63, 63 +; CHECK-NEXT: rxsbg %r13, %r13, 33, 63, 63 ; CHECK-NEXT: xilf %r13, 1 ; CHECK-NEXT: brasl %r14, fesetround@PLT ; CHECK-NEXT: efpc %r0 -; CHECK-NEXT: lr %r1, %r0 -; CHECK-NEXT: nilf %r1, 3 -; CHECK-NEXT: rxsbg %r1, %r0, 63, 63, 63 -; CHECK-NEXT: xilf %r1, 1 -; CHECK-NEXT: crje %r13, %r1, .LBB1_2 +; CHECK-NEXT: nilf %r0, 3 +; CHECK-NEXT: rxsbg %r0, %r0, 33, 63, 63 +; CHECK-NEXT: xilf %r0, 1 +; CHECK-NEXT: crje %r13, %r0, .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lhi %r0, 1 ; CHECK-NEXT: strl %r0, changed diff --git a/llvm/test/CodeGen/SystemZ/rnsbg-01.ll b/llvm/test/CodeGen/SystemZ/rnsbg-01.ll index 428a05c..0595364 100644 --- a/llvm/test/CodeGen/SystemZ/rnsbg-01.ll +++ b/llvm/test/CodeGen/SystemZ/rnsbg-01.ll @@ -219,7 +219,8 @@ define i64 @f19(i64 %a, i64 %b, ptr %dest) { ; Test a combination involving an ASHR in which the sign bits don't matter. define i32 @f20(i32 %a, i32 %b, ptr %dest) { ; CHECK-LABEL: f20: -; CHECK: rnsbg %r2, %r3, 48, 62, 48 +; CHECK-NOT: lr +; CHECK: rnsbg %r2, %r3, 48, 62, 1 ; CHECK: br %r14 %ashrb = ashr i32 %b, 17 store i32 %ashrb, ptr %dest @@ -232,7 +233,7 @@ define i32 @f20(i32 %a, i32 %b, ptr %dest) { ; ...and again with i64. define i64 @f21(i64 %a, i64 %b, ptr %dest) { ; CHECK-LABEL: f21: -; CHECK: rnsbg %r2, %r3, 48, 62, 16 +; CHECK: rnsbg %r2, %r0, 48, 62, 1 ; CHECK: br %r14 %ashrb = ashr i64 %b, 49 store i64 %ashrb, ptr %dest diff --git a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll index 2687857..1507f2c35 100644 --- a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -77,36 +77,37 @@ define void @fun2(<8 x i32> %src, ptr %p) ; CHECK-NEXT: vlgvf %r5, %v24, 0 ; CHECK-NEXT: vlgvf %r3, %v24, 1 ; CHECK-NEXT: srlk %r0, %r1, 8 -; CHECK-NEXT: sllg %r5, %r5, 33 ; CHECK-NEXT: sth %r0, 28(%r2) -; CHECK-NEXT: rosbg %r5, %r3, 31, 55, 2 ; CHECK-NEXT: vlgvf %r0, %v24, 2 +; CHECK-NEXT: sllg %r5, %r5, 33 ; CHECK-NEXT: sllg %r4, %r3, 58 +; CHECK-NEXT: risbgn %r0, %r0, 6, 164, 27 +; CHECK-NEXT: rosbg %r5, %r3, 31, 55, 2 ; CHECK-NEXT: vlgvf %r3, %v26, 2 ; CHECK-NEXT: stc %r1, 30(%r2) -; CHECK-NEXT: rosbg %r4, %r0, 6, 36, 27 +; CHECK-NEXT: ogr %r4, %r0 ; CHECK-NEXT: risbgn %r1, %r1, 33, 167, 0 -; CHECK-NEXT: rosbg %r1, %r3, 2, 32, 31 -; CHECK-NEXT: srlg %r1, %r1, 24 ; CHECK-NEXT: rosbg %r5, %r4, 56, 63, 8 +; CHECK-NEXT: risbgn %r3, %r3, 2, 160, 31 +; CHECK-NEXT: ogr %r1, %r3 ; CHECK-NEXT: vlgvf %r4, %v24, 3 +; CHECK-NEXT: srlg %r1, %r1, 24 +; CHECK-NEXT: rosbg %r0, %r4, 37, 63, 60 ; CHECK-NEXT: st %r1, 24(%r2) ; CHECK-NEXT: vlgvf %r1, %v26, 0 -; CHECK-NEXT: risbgn %r0, %r0, 6, 164, 27 -; CHECK-NEXT: rosbg %r0, %r4, 37, 63, 60 ; CHECK-NEXT: stg %r5, 0(%r2) +; CHECK-NEXT: risbgn %r1, %r1, 4, 162, 29 ; CHECK-NEXT: sllg %r5, %r4, 60 +; CHECK-NEXT: ogr %r5, %r1 ; CHECK-NEXT: sllg %r0, %r0, 8 -; CHECK-NEXT: rosbg %r5, %r1, 4, 34, 29 -; CHECK-NEXT: risbgn %r1, %r1, 4, 162, 29 ; CHECK-NEXT: rosbg %r0, %r5, 56, 63, 8 ; CHECK-NEXT: stg %r0, 8(%r2) ; CHECK-NEXT: vlgvf %r0, %v26, 1 ; CHECK-NEXT: sllg %r4, %r0, 62 +; CHECK-NEXT: ogr %r3, %r4 ; CHECK-NEXT: rosbg %r1, %r0, 35, 63, 62 ; CHECK-NEXT: sllg %r0, %r1, 8 -; CHECK-NEXT: rosbg %r4, %r3, 2, 32, 31 -; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8 +; CHECK-NEXT: rosbg %r0, %r3, 56, 63, 8 ; CHECK-NEXT: stg %r0, 16(%r2) ; CHECK-NEXT: br %r14 { -- 2.7.4