From: Fraser Cormack Date: Wed, 6 Oct 2021 17:14:06 +0000 (+0100) Subject: [SelectionDAG] Widen scalable-vector loads/stores via VP_LOAD/VP_STORE X-Git-Tag: upstream/15.0.7~26196 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=332318ffb613907a268aa909552e3eb16bc1af6b;p=platform%2Fupstream%2Fllvm.git [SelectionDAG] Widen scalable-vector loads/stores via VP_LOAD/VP_STORE This patch fixes a compiler crash when widening scalable-vector loads and stores which end up breaking down to element-wise store operations. It does so by providing a way for targets with support for vector-predicated loads and stores to use those instead. By widening the operation but maintaining the original effective operation length via the EVL, only the intended vector elements are loaded or stored. This method should in theory be possible and even preferred for fixed-length vector types, but all fixed-length types can be broken down into their elements, and regardless I have observed regressions in the generated code when doing so. I believe this is simply due to VP_LOAD/VP_STORE not being up to par with LOAD/STORE in terms of optimization. It does improve performance on smaller self-contained examples, however, so the potential is there. While the only target that benefits from this is RISCV, the legalization is generic and so was placed centrally. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D111248 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f42f566..b5ff6b5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4178,23 +4178,48 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { else Result = GenWidenVectorLoads(LdChain, LD); - if (!Result) - report_fatal_error("Unable to widen vector load"); + if (Result) { + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); - // Modified the chain - switch anything that used the old chain to use - // the new one. - ReplaceValueWith(SDValue(N, 1), NewChain); + return Result; + } - return Result; + // Generate a vector-predicated load if it is custom/legal on the target. To + // avoid possible recursion, only do this if the widened mask type is legal. + // FIXME: Not all targets may support EVL in VP_LOAD. These will have been + // removed from the IR by the ExpandVectorPredication pass but we're + // reintroducing them here. + EVT LdVT = LD->getMemoryVT(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideVT.getVectorElementCount()); + if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() && + TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && + TLI.isTypeLegal(WideMaskVT)) { + SDLoc DL(N); + SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); + unsigned NumVTElts = LdVT.getVectorMinNumElements(); + SDValue EVL = + DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + const auto *MMO = LD->getMemOperand(); + return DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, + EVL, MMO->getPointerInfo(), MMO->getAlign(), + MMO->getFlags(), MMO->getAAInfo()); + } + + report_fatal_error("Unable to widen vector load"); } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { @@ -5035,13 +5060,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { return TLI.scalarizeVectorStore(ST, DAG); SmallVector StChain; - if (!GenWidenVectorStores(StChain, ST)) - report_fatal_error("Unable to widen vector store"); + if (GenWidenVectorStores(StChain, ST)) { + if (StChain.size() == 1) + return StChain[0]; + + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + } - if (StChain.size() == 1) - return StChain[0]; + // Generate a vector-predicated store if it is custom/legal on the target. + // To avoid possible recursion, only do this if the widened mask type is + // legal. + // FIXME: Not all targets may support EVL in VP_STORE. These will have been + // removed from the IR by the ExpandVectorPredication pass but we're + // reintroducing them here. + SDValue StVal = ST->getValue(); + EVT StVT = StVal.getValueType(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideVT.getVectorElementCount()); + if (WideVT.isScalableVector() && + TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && + TLI.isTypeLegal(WideMaskVT)) { + // Widen the value. + SDLoc DL(N); + StVal = GetWidenedVector(StVal); + SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); + unsigned NumVTElts = StVT.getVectorMinNumElements(); + SDValue EVL = + DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + const auto *MMO = ST->getMemOperand(); + return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask, + EVL, MMO->getPointerInfo(), MMO->getAlign(), + MMO->getFlags(), MMO->getAAInfo()); + } - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + report_fatal_error("Unable to widen vector store"); } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll index 7676b29..aedf2b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll @@ -1,16 +1,33 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s -; RUN: not --crash llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s ; Check that we are able to legalize scalable-vector loads that require widening. -; FIXME: LLVM can't yet widen scalable-vector loads. - define @load_nxv3i8(* %ptr) { +; CHECK-LABEL: load_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a1, a1, 3 +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %v = load , * %ptr ret %v } define @load_nxv5f16(* %ptr) { +; CHECK-LABEL: load_nxv5f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a1, a1, 3 +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret %v = load , * %ptr ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll index 2c52e1b..bd7993c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll @@ -1,16 +1,33 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s -; RUN: not --crash llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s ; Check that we are able to legalize scalable-vector stores that require widening. -; FIXME: LLVM can't yet widen scalable-vector stores. - define void @store_nxv3i8( %val, * %ptr) { +; CHECK-LABEL: store_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a1, a1, 3 +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret store %val, * %ptr ret void } define void @store_nxv7f64( %val, * %ptr) { +; CHECK-LABEL: store_nxv7f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a1, a1, 3 +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret store %val, * %ptr ret void }