From 08a09822a5cb95940cfe11fc885840916c516dc4 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 4 Sep 2019 15:22:36 +0000 Subject: [PATCH] [Hexagon] Improve generated code for test-if-bit-clear, one more time Adjust isel patterns after recent commit. Fixes https://llvm.org/PR43194. llvm-svn: 370913 --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 47 ++++++++++++++++++------------ llvm/test/CodeGen/Hexagon/isel-prefer.ll | 4 +-- llvm/test/CodeGen/Hexagon/tstbit.ll | 43 ++++++--------------------- 3 files changed, 40 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 4cf4af0..a9ba989 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -1763,14 +1763,12 @@ def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1), def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1), (ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>; -def N1toI32: OutPatFrag<(ops node:$Pu), (C2_muxii (i1 $Pu), 0, 1)>; - def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1), - (N1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>; + (I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>; def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1), - (ToZext64 (N1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>; + (ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>; def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1), - (ToZext64 (N1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>; + (ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>; let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), @@ -1792,23 +1790,28 @@ def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5), def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt), (S2_tstbit_r I32:$Rs, I32:$Rt)>; +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S4_ntstbit_i I32:$Rs, imm:$u5)>; + def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)), + (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)), + (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), (S4_ntstbit_r I32:$Rs, I32:$Rt)>; + def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S2_tstbit_r I32:$Rs, I32:$Rt)>; } -// Add extra complexity to prefer these instructions over bitsset/bitsclr. -// The reason is that tstbit/ntstbit can be folded into a compound instruction: -// if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; - -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; +def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)), + (S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>; +def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)), + (S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>; +def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)), + (S2_tstbit_i (LoReg $Rs), (Log2_32 imm:$u6))>; +def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)), + (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_32 imm:$u6))))>; // Do not increase complexity of these patterns. In the DAG, "cmp i8" may be // represented as a compare against "value & 0xFF", which is an exact match @@ -1828,10 +1831,18 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), let AddedComplexity = 100 in { // Avoid A4_rcmp[n]eqi in these cases: + def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))), + (I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; + def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))), + (I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), - (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + (I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>; + def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>; } // --(11) PIC ------------------------------------------------------------ diff --git a/llvm/test/CodeGen/Hexagon/isel-prefer.ll b/llvm/test/CodeGen/Hexagon/isel-prefer.ll index 1b69f56..130ea04 100644 --- a/llvm/test/CodeGen/Hexagon/isel-prefer.ll +++ b/llvm/test/CodeGen/Hexagon/isel-prefer.ll @@ -80,8 +80,8 @@ b2: ret i32 %v6 } -define i32 @Prefer_S2_ntstbit_r(i32 %a0, i32 %a1) #0 { -; CHECK-LABEL: Prefer_S2_ntstbit_r: +define i32 @Prefer_S4_ntstbit_r(i32 %a0, i32 %a1) #0 { +; CHECK-LABEL: Prefer_S4_ntstbit_r: ; CHECK: // %bb.0: // %b2 ; CHECK-NEXT: { ; CHECK-NEXT: p0 = !tstbit(r0,r1) diff --git a/llvm/test/CodeGen/Hexagon/tstbit.ll b/llvm/test/CodeGen/Hexagon/tstbit.ll index 7c80fcb..a5d6183 100644 --- a/llvm/test/CodeGen/Hexagon/tstbit.ll +++ b/llvm/test/CodeGen/Hexagon/tstbit.ll @@ -20,22 +20,12 @@ b0: ret i32 %v3 } -; TODO: Match to tstbit? - define i64 @is_upper_bit_clear_i64(i64 %x) #0 { ; CHECK-LABEL: is_upper_bit_clear_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r4 = #0 -; CHECK-NEXT: r2 = #32 -; CHECK-NEXT: r7:6 = combine(#0,#0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r5 = and(r1,r2) -; CHECK-NEXT: r1 = r4 -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = cmp.eq(r5:4,r7:6) +; CHECK-NEXT: p0 = !tstbit(r1,#5) +; CHECK-NEXT: r1 = #0 ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r0 = mux(p0,#1,#0) @@ -47,23 +37,14 @@ define i64 @is_upper_bit_clear_i64(i64 %x) #0 { ret i64 %r } -; TODO: Match to tstbit? - define i64 @is_lower_bit_clear_i64(i64 %x) #0 { ; CHECK-LABEL: is_lower_bit_clear_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r5:4 = combine(#0,#0) -; CHECK-NEXT: r2 = ##134217728 +; CHECK-NEXT: p0 = !tstbit(r0,#27) ; CHECK-NEXT: r1 = #0 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,r2) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = cmp.eq(r1:0,r5:4) -; CHECK-NEXT: } -; CHECK-NEXT: { ; CHECK-NEXT: r0 = mux(p0,#1,#0) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } @@ -73,16 +54,14 @@ define i64 @is_lower_bit_clear_i64(i64 %x) #0 { ret i64 %r } -; TODO: Match to tstbit? - define i32 @is_bit_clear_i32(i32 %x) #0 { ; CHECK-LABEL: is_bit_clear_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,##134217728) +; CHECK-NEXT: p0 = !tstbit(r0,#27) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r0 = cmp.eq(r0,#0) +; CHECK-NEXT: r0 = mux(p0,#1,#0) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %sh = lshr i32 %x, 27 @@ -91,16 +70,14 @@ define i32 @is_bit_clear_i32(i32 %x) #0 { ret i32 %r } -; TODO: Match to tstbit? - define i16 @is_bit_clear_i16(i16 %x) #0 { ; CHECK-LABEL: is_bit_clear_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,#128) +; CHECK-NEXT: p0 = !tstbit(r0,#7) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r0 = cmp.eq(r0,#0) +; CHECK-NEXT: r0 = mux(p0,#1,#0) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %sh = lshr i16 %x, 7 @@ -109,16 +86,14 @@ define i16 @is_bit_clear_i16(i16 %x) #0 { ret i16 %r } -; TODO: Match to tstbit? - define i8 @is_bit_clear_i8(i8 %x) #0 { ; CHECK-LABEL: is_bit_clear_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,#8) +; CHECK-NEXT: p0 = !tstbit(r0,#3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r0 = cmp.eq(r0,#0) +; CHECK-NEXT: r0 = mux(p0,#1,#0) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %sh = lshr i8 %x, 3 -- 2.7.4