From 1137b7207d707c83a0fa99c60f0411bfec98db06 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Wed, 20 Oct 2021 22:38:02 +0100 Subject: [PATCH] [SelectionDAG] Widening the result of INSERT_SUBVECTOR. Widens the result and first input vector because they have the same size. The subvector to be inserted is widened in the operand widen function. Differential Revision: https://reviews.llvm.org/D112187 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 13 ++++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll | 88 ++++++++++++++++++++++ 3 files changed, 102 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cacd0e9..4742b13 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -908,6 +908,7 @@ private: SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 08c4fa9..1afa219 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3038,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: + Res = WidenVecRes_INSERT_SUBVECTOR(N); + break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; @@ -4059,6 +4062,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(WidenVT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index ce90e0c..1122fc2 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -399,3 +399,91 @@ declare @llvm.experimental.vector.insert.nxv6i16.nxv1i16( @llvm.experimental.vector.insert.nxv8i16.nxv2i16(, , i64) declare @llvm.experimental.vector.insert.nxv8f16.nxv2f16(, , i64) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Upacked types that need result widening +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define @insert_nxv3i32_nxv2i32( %sv0) { +; CHECK-LABEL: insert_nxv3i32_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv3i32.nxv2i32( undef, %sv0, i64 0) + ret %v0 +} + +;; Check that the Subvector is not widen so it does not crash. +define @insert_nxv3i32_nxv2i32_2( %sv0, %sv1) { +; CHECK-LABEL: insert_nxv3i32_nxv2i32_2: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv3i32.nxv2i32( %sv0, %sv1, i64 0) + ret %v0 +} + +define @insert_nxv3f32_nxv2f32( %sv0) nounwind { +; CHECK-LABEL: insert_nxv3f32_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1w { z0.d }, p0, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv3f32.nxv2f32( undef, %sv0, i64 0) + ret %v0 +} + +define @insert_nxv6i32_nxv2i32( %sv0, %sv1) nounwind { +; CHECK-LABEL: insert_nxv6i32_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: st1w { z2.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1w { z0.s }, p1, [sp] +; CHECK-NEXT: ld1w { z1.s }, p1/z, [x8, #1, mul vl] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp] +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv6i32.nxv2i32( undef, %sv0, i64 0) + %v1 = call @llvm.experimental.vector.insert.nxv6i32.nxv2i32( %v0, %sv1, i64 2) + ret %v1 +} + +;; This only works because the input vector is undef and index is zero +define @insert_nxv6i32_nxv3i32( %sv0) { +; CHECK-LABEL: insert_nxv6i32_nxv3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv6i32.nxv3i32( undef, %sv0, i64 0) + ret %v0 +} + +define @insert_nxv12i32_nxv4i32( %sv0, %sv1, %sv2) { +; CHECK-LABEL: insert_nxv12i32_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv4i32.nxv12i32( undef, %sv0, i64 0) + %v1 = call @llvm.experimental.vector.insert.nxv4i32.nxv12i32( %v0, %sv1, i64 4) + %v2 = call @llvm.experimental.vector.insert.nxv4i32.nxv12i32( %v1, %sv2, i64 8) + ret %v2 +} + +declare @llvm.experimental.vector.insert.nxv3i32.nxv2i32(, , i64) +declare @llvm.experimental.vector.insert.nxv3f32.nxv2f32(, , i64) +declare @llvm.experimental.vector.insert.nxv6i32.nxv2i32(, , i64) +declare @llvm.experimental.vector.insert.nxv6i32.nxv3i32(, , i64) +declare @llvm.experimental.vector.insert.nxv4i32.nxv12i32(, , i64) -- 2.7.4