From 59a4ee97288b1297bb98edd7f24fecd5e9c57170 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 26 May 2021 23:28:44 -0700 Subject: [PATCH] [AArch64][GlobalISel] Legalize oversize G_EXTRACT_VECTOR_ELT sources. Also changes the fewerElements helper to use the lookthrough constant helper instead of m_ICst, since m_ICst doesn't look through extends. Differential Revision: https://reviews.llvm.org/D103227 --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 6 +- .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 5 +- .../CodeGen/AArch64/GlobalISel/arm64-fallback.ll | 2 +- .../GlobalISel/legalize-extract-vector-elt.mir | 98 ++++++++++++++++++++++ 4 files changed, 108 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8dca8bf..4240f7f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3807,7 +3807,11 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, // If the index is a constant, we can really break this down as you would // expect, and index into the target size pieces. int64_t IdxVal; - if (mi_match(Idx, MRI, m_ICst(IdxVal))) { + auto MaybeCst = + getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true, + /*HandleFConstants*/ false); + if (MaybeCst) { + IdxVal = MaybeCst->Value.getSExtValue(); // Avoid out of bounds indexing the pieces. if (IdxVal >= VecTy.getNumElements()) { MIRBuilder.buildUndef(DstReg); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b54140e..f4058fc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -639,7 +639,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return Query.Types[1].getNumElements() <= 16; }, 0, s8) - .minScalarOrElt(0, s8); // Worst case, we need at least s8. + .minScalarOrElt(0, s8) // Worst case, we need at least s8. + .clampMaxNumElements(1, s64, 2) + .clampMaxNumElements(1, s32, 4) + .clampMaxNumElements(1, s16, 8); getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64})); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index ecd156f..fc5481c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -116,7 +116,7 @@ define void @nonpow2_load_narrowing() { ; Currently can't handle vector lengths that aren't an exact multiple of ; natively supported vector lengths. Test that the fall-back works for those. ; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s64) = G_EXTRACT_VECTOR_ELT %{{[0-9]+}}:_(<7 x s64>), %{{[0-9]+}}:_(s64) (in function: nonpow2_vector_add_fewerelements) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements: define void @nonpow2_vector_add_fewerelements() { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir index b1f218d..1c8c45b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -147,3 +147,101 @@ body: | $x0 = COPY %3(p0) RET_ReallyLR ... +--- +name: test_eve_v4s64 +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: %idx:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64) + ; CHECK: $x0 = COPY [[EVEC]](s64) + ; CHECK: RET_ReallyLR + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>) + %idx:_(s32) = G_CONSTANT i32 1 + %idxprom:_(s64) = G_SEXT %idx(s32) + %3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idxprom:_(s64) + $x0 = COPY %3(s64) + RET_ReallyLR +... +--- +name: test_eve_v4s64_unknown_idx +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v4s64_unknown_idx + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: %idx:_(s64) = COPY $x0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store 16 into %stack.0, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; CHECK: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %stack.0 + 16, basealign 32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]] + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load 8) + ; CHECK: $x0 = COPY [[LOAD]](s64) + ; CHECK: RET_ReallyLR + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>) + %idx:_(s64) = COPY $x0 + %3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idx:_(s64) + $x0 = COPY %3(s64) + RET_ReallyLR +... +--- +name: test_eve_v8s32 +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v8s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: %idx:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK: $w0 = COPY [[EVEC]](s32) + ; CHECK: RET_ReallyLR + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %concat:_(<8 x s32>) = G_CONCAT_VECTORS %0(<4 x s32>), %1(<4 x s32>) + %idx:_(s32) = G_CONSTANT i32 1 + %idxprom:_(s64) = G_SEXT %idx(s32) + %3:_(s32) = G_EXTRACT_VECTOR_ELT %concat:_(<8 x s32>), %idxprom:_(s64) + $w0 = COPY %3(s32) + RET_ReallyLR +... +--- +name: test_eve_v16s16 +body: | + bb.0: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: test_eve_v16s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK: %idx:_(s32) = G_CONSTANT i32 9 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64) + ; CHECK: %ext:_(s32) = G_ANYEXT [[EVEC]](s16) + ; CHECK: $w0 = COPY %ext(s32) + ; CHECK: RET_ReallyLR + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = COPY $q1 + %concat:_(<16 x s16>) = G_CONCAT_VECTORS %0(<8 x s16>), %1(<8 x s16>) + %idx:_(s32) = G_CONSTANT i32 9 + %idxprom:_(s64) = G_SEXT %idx(s32) + %3:_(s16) = G_EXTRACT_VECTOR_ELT %concat:_(<16 x s16>), %idxprom:_(s64) + %ext:_(s32) = G_ANYEXT %3 + $w0 = COPY %ext(s32) + RET_ReallyLR +... -- 2.7.4