From: Amara Emerson Date: Tue, 3 Nov 2020 19:17:31 +0000 (-0800) Subject: [AArch64][GlobalISel] Add combine for G_EXTRACT_VECTOR_ELT to allow selection of... X-Git-Tag: llvmorg-13-init~7165 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=393b55380afcd8681db03dfbdea2f27ff3517d25;p=platform%2Fupstream%2Fllvm.git [AArch64][GlobalISel] Add combine for G_EXTRACT_VECTOR_ELT to allow selection of pairwise FADD. For the <2 x float> case, instead of adding another combine or legalization to get it into a <4 x float> form, I'm just adding a GISel specific selection pattern to cover it. Differential Revision: https://reviews.llvm.org/D90699 --- diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1e0be24..560e362 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -94,6 +94,14 @@ def adjust_icmp_imm : GICombineRule < def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>; +def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple">; +def extractvecelt_pairwise_add : GICombineRule< + (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo), + (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root, + [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -110,6 +118,7 @@ def AArch64PostLegalizerCombinerHelper [copy_prop, erase_undef_store, combines_for_extload, sext_trunc_sextload, hoist_logic_op_with_same_opcode_hands, - and_trivial_mask, xor_of_and_with_same_reg]> { + and_trivial_mask, xor_of_and_with_same_reg, + extractvecelt_pairwise_add]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 79b563e..1bd9ce2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -135,4 +135,9 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; \ No newline at end of file +def : GINodeEquiv; + +// These are patterns that we only use for GlobalISel via the importer. +def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)), + (vector_extract (v2f32 FPR64:$Rn), (i64 1)))), + (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>; \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 4f39388..17520de 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -24,8 +24,11 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -33,6 +36,74 @@ using namespace llvm; +/// This combine tries do what performExtractVectorEltCombine does in SDAG. +/// Rewrite for pairwise fadd pattern +/// (s32 (g_extract_vector_elt +/// (g_fadd (vXs32 Other) +/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0)) +/// -> +/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0) +/// (g_extract_vector_elt (vXs32 Other) 1)) +bool matchExtractVecEltPairwiseAdd( + MachineInstr &MI, MachineRegisterInfo &MRI, + std::tuple &MatchInfo) { + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + auto Cst = getConstantVRegValWithLookThrough(Src2, MRI); + if (!Cst || Cst->Value != 0) + return false; + // SDAG also checks for FullFP16, but this looks to be beneficial anyway. + + // Now check for an fadd operation. TODO: expand this for integer add? + auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI); + if (!FAddMI) + return false; + + // If we add support for integer add, must restrict these types to just s64. + unsigned DstSize = DstTy.getSizeInBits(); + if (DstSize != 16 && DstSize != 32 && DstSize != 64) + return false; + + Register Src1Op1 = FAddMI->getOperand(1).getReg(); + Register Src1Op2 = FAddMI->getOperand(2).getReg(); + MachineInstr *Shuffle = + getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI); + MachineInstr *Other = MRI.getVRegDef(Src1Op1); + if (!Shuffle) { + Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI); + Other = MRI.getVRegDef(Src1Op2); + } + + // We're looking for a shuffle that moves the second element to index 0. + if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 && + Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) { + std::get<0>(MatchInfo) = TargetOpcode::G_FADD; + std::get<1>(MatchInfo) = DstTy; + std::get<2>(MatchInfo) = Other->getOperand(0).getReg(); + return true; + } + return false; +} + +bool applyExtractVecEltPairwiseAdd( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + std::tuple &MatchInfo) { + unsigned Opc = std::get<0>(MatchInfo); + assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!"); + // We want to generate two extracts of elements 0 and 1, and add them. + LLT Ty = std::get<1>(MatchInfo); + Register Src = std::get<2>(MatchInfo); + LLT s64 = LLT::scalar(64); + B.setInstrAndDebugLoc(MI); + auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0)); + auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1)); + B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir new file mode 100644 index 0000000..7906345 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir @@ -0,0 +1,188 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: f64_faddp +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: f64_faddp + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $d0 = COPY [[FADD]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef) + %3:_(<2 x s64>) = G_FADD %1, %0 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: f64_faddp_commuted +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: f64_faddp_commuted + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $d0 = COPY [[FADD]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef) + %3:_(<2 x s64>) = G_FADD %0, %1 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: f32_faddp +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: f32_faddp + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $s0 = COPY [[FADD]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef) + %3:_(<2 x s32>) = G_FADD %1, %0 + %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64) + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: f32_faddp_commuted +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: f32_faddp_commuted + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $s0 = COPY [[FADD]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef) + %3:_(<2 x s32>) = G_FADD %0, %1 + %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64) + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: wrong_extract_idx +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: wrong_extract_idx + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(1, undef) + ; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]] + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64) + ; CHECK: $d0 = COPY [[EVEC]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 1 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef) + %3:_(<2 x s64>) = G_FADD %1, %0 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: wrong_shuffle_mask +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: wrong_shuffle_mask + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(0, undef) + ; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]] + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64) + ; CHECK: $d0 = COPY [[EVEC]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, undef) + %3:_(<2 x s64>) = G_FADD %1, %0 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir new file mode 100644 index 0000000..7706308 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=instruction-select %s -o - | FileCheck %s +--- +name: f64_faddp +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: f64_faddp + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]] + ; CHECK: $d0 = COPY [[FADDPv2i64p]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s64>) = COPY $q0 + %6:gpr(s64) = G_CONSTANT i64 0 + %7:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %6(s64) + %8:gpr(s64) = G_CONSTANT i64 1 + %9:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %8(s64) + %4:fpr(s64) = G_FADD %7, %9 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: f32_faddp +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: f32_faddp + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]] + ; CHECK: $s0 = COPY [[FADDPv2i32p]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(<2 x s32>) = COPY $d0 + %6:gpr(s64) = G_CONSTANT i64 0 + %7:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %6(s64) + %8:gpr(s64) = G_CONSTANT i64 1 + %9:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %8(s64) + %4:fpr(s32) = G_FADD %7, %9 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +...