From 4dc04e6a701a4a033d348c5ca6edb045de5a1cf4 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 27 Oct 2017 22:24:49 +0000 Subject: [PATCH] [Hexagon] Adjust patterns to reflect instruction selection preferences llvm-svn: 316804 --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 37 +++++++++++++++---- llvm/test/CodeGen/Hexagon/isel-prefer.ll | 57 ++++++++++++++++++++++++++++++ llvm/test/CodeGen/Hexagon/swp-stages4.ll | 4 +-- 3 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/isel-prefer.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 9083557..d432bfe 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -1301,9 +1301,14 @@ def: AccRRR_pat, I32, I32>; def: AccRRR_pat, I32, I32>; def: AccRRR_pat, I64, I64>; -def: AccRRR_pat>, I32, I32>; -def: AccRRR_pat>, I32, I32>; -def: AccRRR_pat>, I32, I32>; +// For dags like (or (and (not _), _), (shl _, _)) where the "or" with +// one argument matches the patterns below, and with the other argument +// matches S2_asl_r_r_or, etc, prefer the patterns below. +let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. + def: AccRRR_pat>, I32, I32>; + def: AccRRR_pat>, I32, I32>; + def: AccRRR_pat>, I32, I32>; +} // S4_addaddi and S4_subaddi don't have tied operands, so give them // a bit of preference. @@ -1418,10 +1423,18 @@ def : Pat <(mulhs I64:$Rss, I64:$Rtt), (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; -def: Pat<(add (Su I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6), - (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; -def: Pat<(add (Su I32:$Rs, I32:$Rt), anyimm:$u6), - (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; +// Prefer these instructions over M2_macsip/M2_macsin: the macsi* instructions +// will put the immediate addend into a register, while these instructions will +// use it directly. Such a construct does not appear in the middle of a gep, +// where M2_macsip would be preferable. +let AddedComplexity = 20 in { + def: Pat<(add (Su I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6), + (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; + def: Pat<(add (Su I32:$Rs, I32:$Rt), anyimm:$u6), + (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; +} + +// Keep these instructions less preferable to M2_macsip/M2_macsin. def: Pat<(add I32:$Ru, (Su I32:$Rs, u6_2ImmPred:$u6_2)), (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>; def: Pat<(add I32:$Ru, (Su I32:$Rs, anyimm:$u6)), @@ -1599,6 +1612,16 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), (C4_nbitsset I32:$Rs, I32:$Rt)>; +// Special patterns to address certain cases where the "top-down" matching +// algorithm would cause suboptimal selection. + +let AddedComplexity = 100 in { + // Avoid A4_rcmp[n]eqi in these cases: + def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; +} // --(11) Load ----------------------------------------------------------- // diff --git a/llvm/test/CodeGen/Hexagon/isel-prefer.ll b/llvm/test/CodeGen/Hexagon/isel-prefer.ll new file mode 100644 index 0000000..062b0b3 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-prefer.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +@data1 = external global [2 x [31 x i8]], align 8 +@data2 = external global [2 x [91 x i8]], align 8 + +; CHECK-LABEL: Prefer_M4_or_andn: +; CHECK: r2 |= and(r0,~r1) +define i32 @Prefer_M4_or_andn(i32 %a0, i32 %a1, i32 %a2) #0 { +b3: + %v4 = xor i32 %a1, -1 + %v5 = shl i32 %a2, 5 + %v6 = and i32 %a0, %v4 + %v7 = or i32 %v6, %v5 + ret i32 %v7 +} + +; CHECK-LABEL: Prefer_M4_mpyri_addi: +; CHECK: add(##data1,mpyi(r0,#31)) +define i32 @Prefer_M4_mpyri_addi(i32 %a0) #0 { +b1: + %v2 = getelementptr inbounds [2 x [31 x i8]], [2 x [31 x i8]]* @data1, i32 0, i32 %a0 + %v3 = ptrtoint [31 x i8]* %v2 to i32 + ret i32 %v3 +} + +; CHECK-LABEL: Prefer_M4_mpyrr_addi: +; CHECK: add(##data2,mpyi(r0,r1)) +define i32 @Prefer_M4_mpyrr_addi(i32 %a0) #0 { +b1: + %v2 = getelementptr inbounds [2 x [91 x i8]], [2 x [91 x i8]]* @data2, i32 0, i32 %a0 + %v3 = ptrtoint [91 x i8]* %v2 to i32 + ret i32 %v3 +} + +; CHECK-LABEL: Prefer_S2_tstbit_r: +; CHECK: p0 = tstbit(r0,r1) +define i32 @Prefer_S2_tstbit_r(i32 %a0, i32 %a1) #0 { +b2: + %v3 = shl i32 1, %a1 + %v4 = and i32 %a0, %v3 + %v5 = icmp ne i32 %v4, 0 + %v6 = zext i1 %v5 to i32 + ret i32 %v6 +} + +; CHECK-LABEL: Prefer_S2_ntstbit_r: +; CHECK: p0 = !tstbit(r0,r1) +define i32 @Prefer_S2_ntstbit_r(i32 %a0, i32 %a1) #0 { +b2: + %v3 = shl i32 1, %a1 + %v4 = and i32 %a0, %v3 + %v5 = icmp eq i32 %v4, 0 + %v6 = zext i1 %v5 to i32 + ret i32 %v6 +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/swp-stages4.ll b/llvm/test/CodeGen/Hexagon/swp-stages4.ll index f58e832..8e8d977 100644 --- a/llvm/test/CodeGen/Hexagon/swp-stages4.ll +++ b/llvm/test/CodeGen/Hexagon/swp-stages4.ll @@ -11,9 +11,9 @@ ; CHECK: loop0(.LBB0_[[LOOP:.]], ; CHECK: .LBB0_[[LOOP]]: ; CHECK: [[REG0]] += add +; CHECK: [[REG2:r[0-9]+]] = and ; CHECK: = and -; CHECK: = and -; CHECK: [[REG0]] = and +; CHECK: [[REG0]] = [[REG2]] ; CHECK: endloop ; Function Attrs: nounwind -- 2.7.4