From a513fdec90de6b0719e8dc4f079bbdd78eb9aaf1 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 21 Sep 2020 15:03:29 -0700 Subject: [PATCH] [AArch64][GlobalISel] Add a post-legalize combine for lowering vector-immediate G_ASHR/G_LSHR. In order to select the immediate forms using the imported patterns, we need to lower them into new G_VASHR/G_VLSHR target generic ops. Add a combine to do this matching build_vector of constant operands. With this, we get selection for free. --- llvm/lib/Target/AArch64/AArch64Combine.td | 11 +- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 14 ++ .../AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 44 ++++++ .../postlegalizer-combiner-vashr-vlshr.mir | 147 +++++++++++++++++++++ .../AArch64/GlobalISel/select-vector-shift.mir | 40 ++++++ 5 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-vashr-vlshr.mir diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 2187b61..e493c21 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -75,11 +75,20 @@ def ext: GICombineRule < // instruction. def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>; +def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">; +def vashr_vlshr_imm : GICombineRule< + (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo), + (match (wip_match_opcode G_ASHR, G_LSHR):$root, + [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]) +>; + + def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", [copy_prop, erase_undef_store, combines_for_extload, sext_trunc_sextload, shuffle_vector_pseudos, hoist_logic_op_with_same_opcode_hands, - and_trivial_mask]> { + and_trivial_mask, vashr_vlshr_imm]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index a0e7c78..79b563e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -111,6 +111,18 @@ def G_EXT: AArch64GenericInstruction { let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm); } +// Represents a vector G_ASHR with an immediate. +def G_VASHR : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, untyped_imm_0:$imm); +} + +// Represents a vector G_LSHR with an immediate. +def G_VLSHR : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, untyped_imm_0:$imm); +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -122,3 +134,5 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 15fa5ca..78812d8 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -15,14 +15,19 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -368,6 +373,45 @@ static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { return true; } +/// isVShiftRImm - Check if this is a valid vector for the immediate +/// operand of a vector shift right operation. The value must be in the range: +/// 1 <= Value <= ElementBits for a right shift. +static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, + int64_t &Cnt) { + assert(Ty.isVector() && "vector shift count is not a vector type"); + MachineInstr *MI = MRI.getVRegDef(Reg); + auto Cst = getBuildVectorConstantSplat(*MI, MRI); + if (!Cst) + return false; + Cnt = *Cst; + int64_t ElementBits = Ty.getScalarSizeInBits(); + return Cnt >= 1 && Cnt <= ElementBits; +} + +/// Match a vector G_ASHR or G_LSHR with a valid immediate shift. +static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, + int64_t &Imm) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR || + MI.getOpcode() == TargetOpcode::G_LSHR); + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (!Ty.isVector()) + return false; + return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); +} + +static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, + int64_t &Imm) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); + unsigned NewOpc = + Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; + MachineIRBuilder MIB(MI); + auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); + MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-vashr-vlshr.mir new file mode 100644 index 0000000..9659419 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-vashr-vlshr.mir @@ -0,0 +1,147 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: ashr_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: ashr_v4s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32) + ; CHECK: $q0 = COPY [[VASHR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_ASHR %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: lshr_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: lshr_v4s32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32) + ; CHECK: $q0 = COPY [[VLSHR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: lshr_v8s16 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: lshr_v8s16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32) + ; CHECK: $q0 = COPY [[VLSHR]](<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(s16) = G_CONSTANT i16 5 + %2:_(<8 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16) + %3:_(<8 x s16>) = G_LSHR %0, %2(<8 x s16>) + $q0 = COPY %3(<8 x s16>) + RET_ReallyLR implicit $q0 +... +--- +name: imm_too_large +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: imm_too_large + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: $q0 = COPY [[LSHR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_CONSTANT i32 40 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: imm_zero +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: imm_zero + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: $q0 = COPY [[LSHR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: imm_not_splat +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: imm_not_splat + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C]](s32), [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: $q0 = COPY [[LSHR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_CONSTANT i32 4 + %4:_(s32) = G_CONSTANT i32 6 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %4(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) + RET_ReallyLR implicit $q0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir index d0717ee..9ded501 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir @@ -372,3 +372,43 @@ body: | $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 ... +--- +name: vashr_v4i16_imm +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + ; CHECK-LABEL: name: vashr_v4i16_imm + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SSHRv4i16_shift:%[0-9]+]]:fpr64 = SSHRv4i16_shift [[COPY]], 5 + ; CHECK: $d0 = COPY [[SSHRv4i16_shift]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:gpr(s32) = G_CONSTANT i32 5 + %2:fpr(<4 x s16>) = G_VASHR %0, %1 + $d0 = COPY %2(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: vlshr_v4i16_imm +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $d1 + ; CHECK-LABEL: name: vlshr_v4i16_imm + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[USHRv4i16_shift:%[0-9]+]]:fpr64 = USHRv4i16_shift [[COPY]], 5 + ; CHECK: $d0 = COPY [[USHRv4i16_shift]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:gpr(s32) = G_CONSTANT i32 5 + %2:fpr(<4 x s16>) = G_VLSHR %0, %1 + $d0 = COPY %2(<4 x s16>) + RET_ReallyLR implicit $d0 +... -- 2.7.4