From b1b0372337a4d4895ba4f672f7ca4935a5034262 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 18 Aug 2016 14:12:34 +0000 Subject: [PATCH] [Hexagon] Create vcombine in HexagonCopyToCombine llvm-svn: 279067 --- llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp | 74 ++++++++++++++++++------ llvm/test/CodeGen/Hexagon/vassign-to-combine.ll | 56 ++++++++++++++++++ 2 files changed, 112 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/vassign-to-combine.ll diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 361bba7..29a9e5f 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -11,13 +11,9 @@ // to move them together. If we can move them next to each other we do so and // replace them with a combine instruction. //===----------------------------------------------------------------------===// -#include "llvm/PassSupport.h" -#include "Hexagon.h" #include "HexagonInstrInfo.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" +#include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -64,6 +60,7 @@ namespace { class HexagonCopyToCombine : public MachineFunctionPass { const HexagonInstrInfo *TII; const TargetRegisterInfo *TRI; + const HexagonSubtarget *ST; bool ShouldCombineAggressively; DenseSet PotentiallyNewifiableTFR; @@ -163,6 +160,10 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, (ShouldCombineAggressively || NotExt); } + case Hexagon::V6_vassign: + case Hexagon::V6_vassign_128B: + return true; + default: break; } @@ -186,11 +187,22 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, MachineInstr &LowRegInst, bool AllowC64) { unsigned HiOpc = HighRegInst.getOpcode(); unsigned LoOpc = LowRegInst.getOpcode(); - (void)HiOpc; // Fix compiler warning - (void)LoOpc; // Fix compiler warning - assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) && - (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) && - "Assume individual instructions are of a combinable type"); + + auto verifyOpc = [](unsigned Opc) -> void { + switch (Opc) { + case Hexagon::A2_tfr: + case Hexagon::A2_tfrsi: + case Hexagon::V6_vassign: + break; + default: + llvm_unreachable("Unexpected opcode"); + } + }; + verifyOpc(HiOpc); + verifyOpc(LoOpc); + + if (HiOpc == Hexagon::V6_vassign || LoOpc == Hexagon::V6_vassign) + return HiOpc == LoOpc; if (!AllowC64) { // There is no combine of two constant extended values. @@ -216,9 +228,13 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, } static bool isEvenReg(unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - Hexagon::IntRegsRegClass.contains(Reg)); - return (Reg - Hexagon::R0) % 2 == 0; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (Hexagon::IntRegsRegClass.contains(Reg)) + return (Reg - Hexagon::R0) % 2 == 0; + if (Hexagon::VectorRegsRegClass.contains(Reg) || + Hexagon::VectorRegs128BRegClass.contains(Reg)) + return (Reg - Hexagon::V0) % 2 == 0; + llvm_unreachable("Invalid register"); } static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) { @@ -446,8 +462,9 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { bool HasChanged = false; // Get target info. - TRI = MF.getSubtarget().getRegisterInfo(); - TII = MF.getSubtarget().getInstrInfo(); + ST = &MF.getSubtarget(); + TRI = ST->getRegisterInfo(); + TII = ST->getInstrInfo(); const Function *F = MF.getFunction(); bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize); @@ -566,10 +583,19 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; + const TargetRegisterClass *SuperRC = nullptr; + if (Hexagon::IntRegsRegClass.contains(LoRegDef)) { + SuperRC = &Hexagon::DoubleRegsRegClass; + } else if (Hexagon::VectorRegsRegClass.contains(LoRegDef)) { + assert(ST->useHVXOps()); + if (ST->useHVXSglOps()) + SuperRC = &Hexagon::VecDblRegsRegClass; + else + SuperRC = &Hexagon::VecDblRegs128BRegClass; + } // Get the double word register. unsigned DoubleRegDest = - TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, - &Hexagon::DoubleRegsRegClass); + TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, SuperRC); assert(DoubleRegDest != 0 && "Expect a valid register"); @@ -838,7 +864,19 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, // Insert new combine instruction. // DoubleRegDest = combine HiReg, LoReg - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg) + unsigned NewOpc; + if (Hexagon::DoubleRegsRegClass.contains(DoubleDestReg)) { + NewOpc = Hexagon::A2_combinew; + } else if (Hexagon::VecDblRegsRegClass.contains(DoubleDestReg)) { + assert(ST->useHVXOps()); + if (ST->useHVXSglOps()) + NewOpc = Hexagon::V6_vcombine; + else + NewOpc = Hexagon::V6_vcombine_128B; + } else + llvm_unreachable("Unexpected register"); + + BuildMI(*BB, InsertPt, DL, TII->get(NewOpc), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addReg(LoReg, LoRegKillFlag); } diff --git a/llvm/test/CodeGen/Hexagon/vassign-to-combine.ll b/llvm/test/CodeGen/Hexagon/vassign-to-combine.ll new file mode 100644 index 0000000..a9a0d51 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vassign-to-combine.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; This testcase is known to generate an opportunity for creating vcombine +; in HexagonCopyToCombine. + +; CHECK: vcombine + +target triple = "hexagon-unknown--elf" + +declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #0 +declare <32 x i32> @llvm.hexagon.V6.vabsdiffuh.128B(<32 x i32>, <32 x i32>) #0 +declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #0 +declare <32 x i32> @llvm.hexagon.V6.vsathub.128B(<32 x i32>, <32 x i32>) #0 +declare <64 x i32> @llvm.hexagon.V6.vaddh.dv.128B(<64 x i32>, <64 x i32>) #0 +declare <64 x i32> @llvm.hexagon.V6.vadduhsat.dv.128B(<64 x i32>, <64 x i32>) #0 +declare <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32>, <32 x i32>) #0 +declare <64 x i32> @llvm.hexagon.V6.vmpyub.128B(<32 x i32>, i32) #0 + +define void @foo() local_unnamed_addr #1 { +entry: + %0 = load <32 x i32>, <32 x i32>* undef, align 128 + %1 = load <32 x i32>, <32 x i32>* null, align 128 + br i1 undef, label %b2, label %b1 + +b1: ; preds = %entry + %2 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> %0, <32 x i32> %1, i32 1) + %3 = tail call <64 x i32> @llvm.hexagon.V6.vmpyub.128B(<32 x i32> %2, i32 33686018) #1 + %4 = tail call <64 x i32> @llvm.hexagon.V6.vadduhsat.dv.128B(<64 x i32> undef, <64 x i32> %3) #1 + %5 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %4) + %6 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffuh.128B(<32 x i32> %5, <32 x i32> undef) #1 + %7 = tail call <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32> %6, <32 x i32> undef) + %8 = tail call <64 x i32> @llvm.hexagon.V6.vaddh.dv.128B(<64 x i32> undef, <64 x i32> %7) #1 + %9 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %8) #1 + %10 = tail call <32 x i32> @llvm.hexagon.V6.vsathub.128B(<32 x i32> %9, <32 x i32> undef) #1 + store <32 x i32> %10, <32 x i32>* undef, align 128 + br label %b2 + +b2: ; preds = %b1, %entry + %c2.host31.sroa.3.2.unr.ph = phi <32 x i32> [ zeroinitializer, %b1 ], [ %0, %entry ] + %c2.host31.sroa.0.2.unr.ph = phi <32 x i32> [ %0, %b1 ], [ %1, %entry ] + %11 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> %c2.host31.sroa.3.2.unr.ph, <32 x i32> %c2.host31.sroa.0.2.unr.ph, i32 1) + %12 = tail call <64 x i32> @llvm.hexagon.V6.vmpyub.128B(<32 x i32> %11, i32 33686018) #1 + %13 = tail call <64 x i32> @llvm.hexagon.V6.vadduhsat.dv.128B(<64 x i32> undef, <64 x i32> %12) #1 + %14 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %13) + %15 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffuh.128B(<32 x i32> %14, <32 x i32> undef) #1 + %16 = tail call <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32> %15, <32 x i32> undef) + %17 = tail call <64 x i32> @llvm.hexagon.V6.vaddh.dv.128B(<64 x i32> undef, <64 x i32> %16) #1 + %18 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %17) #1 + %19 = tail call <32 x i32> @llvm.hexagon.V6.vsathub.128B(<32 x i32> %18, <32 x i32> undef) #1 + store <32 x i32> %19, <32 x i32>* undef, align 128 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } + -- 2.7.4