From 5f7ba9a74c5dca7e89f601ab6968c5e9ada30ade Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 23 Mar 2018 18:43:09 +0000 Subject: [PATCH] [Hexagon] Always generate mux out of predicated transfers if possible HexagonGenMux would collapse pairs of predicated transfers if it assumed that the predicated .new forms cannot be created. Turns out that generating mux is preferable in almost all cases. Introduce an option -hexagon-gen-mux-threshold that controls the minimum distance between the instruction defining the predicate and the later of the two transfers. If the distance is closer than the threshold, mux will not be generated. Set the threshold to 0 by default. llvm-svn: 328346 --- llvm/lib/Target/Hexagon/HexagonGenMux.cpp | 12 ++++++++++-- llvm/test/CodeGen/Hexagon/cmp_pred.ll | 2 +- llvm/test/CodeGen/Hexagon/cmp_pred_reg.ll | 2 +- llvm/test/CodeGen/Hexagon/cmpb_pred.ll | 2 +- llvm/test/CodeGen/Hexagon/formal-args-i1.ll | 17 +++++++++++++++++ llvm/test/CodeGen/Hexagon/optimize-mux.ll | 20 ++++++++++++++++++++ llvm/test/CodeGen/Hexagon/split-muxii.ll | 24 ++++++++++++++++++++++++ llvm/test/MC/Hexagon/inst_select.ll | 7 +++---- 8 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/formal-args-i1.ll create mode 100644 llvm/test/CodeGen/Hexagon/optimize-mux.ll create mode 100644 llvm/test/CodeGen/Hexagon/split-muxii.ll diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp index 5a001d6..7c91412 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" #include #include @@ -56,6 +57,11 @@ namespace llvm { } // end namespace llvm +// Initialize this to 0 to always prefer generating mux by default. +static cl::opt MinPredDist("hexagon-gen-mux-threshold", cl::Hidden, + cl::init(0), cl::desc("Minimum distance between predicate definition and " + "farther of the two predicated uses")); + namespace { class HexagonGenMux : public MachineFunctionPass { @@ -269,11 +275,13 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { // There is now a complete definition of DR, i.e. we have the predicate // register, the definition if-true, and definition if-false. - // First, check if both definitions are far enough from the definition + // First, check if the definitions are far enough from the definition // of the predicate register. unsigned MinX = std::min(CI.TrueX, CI.FalseX); unsigned MaxX = std::max(CI.TrueX, CI.FalseX); - unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0; + // Specifically, check if the predicate definition is within a prescribed + // distance from the farther of the two predicated instructions. + unsigned SearchX = (MaxX >= MinPredDist) ? MaxX-MinPredDist : 0; bool NearDef = false; for (unsigned X = SearchX; X < MaxX; ++X) { const DefUseInfo &DU = DUM.lookup(X); diff --git a/llvm/test/CodeGen/Hexagon/cmp_pred.ll b/llvm/test/CodeGen/Hexagon/cmp_pred.ll index ee3f5dd..4835eaf 100644 --- a/llvm/test/CodeGen/Hexagon/cmp_pred.ll +++ b/llvm/test/CodeGen/Hexagon/cmp_pred.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-gen-mux-threshold=4 < %s | FileCheck %s ; Generate various cmpb instruction followed by if (p0) .. if (!p0)... target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/cmp_pred_reg.ll b/llvm/test/CodeGen/Hexagon/cmp_pred_reg.ll index ee3f5dd..4835eaf 100644 --- a/llvm/test/CodeGen/Hexagon/cmp_pred_reg.ll +++ b/llvm/test/CodeGen/Hexagon/cmp_pred_reg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-gen-mux-threshold=4 < %s | FileCheck %s ; Generate various cmpb instruction followed by if (p0) .. if (!p0)... target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/cmpb_pred.ll b/llvm/test/CodeGen/Hexagon/cmpb_pred.ll index d5a76ff..b8c9690 100644 --- a/llvm/test/CodeGen/Hexagon/cmpb_pred.ll +++ b/llvm/test/CodeGen/Hexagon/cmpb_pred.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-gen-mux-threshold=4 < %s | FileCheck %s ; Generate various cmpb instruction followed by if (p0) .. if (!p0)... target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/formal-args-i1.ll b/llvm/test/CodeGen/Hexagon/formal-args-i1.ll new file mode 100644 index 0000000..050b572 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/formal-args-i1.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; This tests validates the fact that the formal arguments of type scalar i1 +; (passed using 32-bit register) is converted back to use predicate registers +; CHECK: [[P0:p[0-3]]] = tstbit(r0,#0) +; CHECK: [[R0:r[0-9]+]] = mux([[P0]],#3,r2) +; CHECK: memb(r1+#0) = [[R0]] + +target triple = "hexagon" + +define void @f0(i1 zeroext %a0, i8* nocapture %a1, i8 %a2) local_unnamed_addr #0 { +entry: + %v0 = select i1 %a0, i8 3, i8 %a2 + store i8 %v0, i8* %a1, align 1 + ret void +} + +attributes #0 = { norecurse nounwind optsize "target-cpu"="hexagonv60" } diff --git a/llvm/test/CodeGen/Hexagon/optimize-mux.ll b/llvm/test/CodeGen/Hexagon/optimize-mux.ll new file mode 100644 index 0000000..6a8b4bc --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/optimize-mux.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hexagon -hexagon-gen-mux-threshold=0 < %s | FileCheck %s --check-prefix=CHECK0 +; RUN: llc -march=hexagon -hexagon-gen-mux-threshold=4 < %s | FileCheck %s --check-prefix=CHECK4 + +; Generate mux with threshold = 0: +; CHECK0: [[R0:r[0-9]+]] = add(r0,#-48) +; CHECK0: [[P0:p[0-3]]] = cmpb.gtu([[R0]],#9) +; CHECK0: r0 = mux([[P0]],#0,#1) + +; No mux for threshold = 4: +; CHECK4-NOT: mux + +define zeroext i8 @f0(i8 zeroext %a0) #0 { +b0: + %v0 = add i8 %a0, -48 + %v1 = icmp ult i8 %v0, 10 + %v2 = zext i1 %v1 to i8 + ret i8 %v2 +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/split-muxii.ll b/llvm/test/CodeGen/Hexagon/split-muxii.ll new file mode 100644 index 0000000..77c79f5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/split-muxii.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=hexagon -O2 -hexagon-expand-condsets=true -hexagon-gen-mux-threshold=4 < %s | FileCheck %s +; CHECK-NOT: mux(p + +target triple = "hexagon" + +define void @f0() #0 { +b0: + %v0 = load i32, i32* null, align 4 + %v1 = icmp slt i32 undef, %v0 + %v2 = zext i1 %v1 to i32 + %v3 = icmp sgt i32 undef, 0 + %v4 = zext i1 %v3 to i32 + %v5 = add nsw i32 %v2, %v4 + store i32 %v5, i32* undef, align 4 + br i1 undef, label %b1, label %b2 + +b1: ; preds = %b0 + br label %b2 + +b2: ; preds = %b1, %b0 + unreachable +} + +attributes #0 = { nounwind } diff --git a/llvm/test/MC/Hexagon/inst_select.ll b/llvm/test/MC/Hexagon/inst_select.ll index a956b2a..ecfa829 100644 --- a/llvm/test/MC/Hexagon/inst_select.ll +++ b/llvm/test/MC/Hexagon/inst_select.ll @@ -7,7 +7,6 @@ define i32 @foo (i1 %a, i32 %b, i32 %c) ret i32 %1 } -; CHECK: 00 40 00 85 85004000 { p0 = tstbit(r0,#0) -; CHECK: 00 40 9f 52 529f4000 jumpr r31 -; CHECK: 00 60 01 74 74016000 if (p0.new) r0 = add(r1,#0) -; CHECK: 00 e0 82 74 7482e000 if (!p0.new) r0 = add(r2,#0) } +; CHECK: 00 c0 00 85 8500c000 { p0 = tstbit(r0,#0) } +; CHECK: 00 42 01 f4 f4014200 { r0 = mux(p0,r1,r2) +; CHECK: 00 c0 9f 52 529fc000 jumpr r31 } -- 2.7.4