From 842cda631238e295f0e2e78893459a3d42de1067 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 13 Sep 2018 12:14:23 +0000 Subject: [PATCH] DAG: Fix expansion of unaligned FP loads and stores This was trying to scalarizing a scalar FP type, resulting in an assert. Fixes unaligned f64 stack stores for AMDGPU. llvm-svn: 342132 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 10 ++-- llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll | 64 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index eb44560..d7d4bef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4153,7 +4153,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { - if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { + if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && + LoadedVT.isVector()) { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); if (Scalarized->getOpcode() == ISD::MERGE_VALUES) @@ -4303,13 +4304,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); auto &MF = DAG.getMachineFunction(); + EVT MemVT = ST->getMemoryVT(); SDLoc dl(ST); - if (ST->getMemoryVT().isFloatingPoint() || - ST->getMemoryVT().isVector()) { + if (MemVT.isFloatingPoint() || MemVT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (isTypeLegal(intVT)) { - if (!isOperationLegalOrCustom(ISD::STORE, intVT)) { + if (!isOperationLegalOrCustom(ISD::STORE, intVT) && + MemVT.isVector()) { // Scalarize the store and let the individual components be handled. SDValue Result = scalarizeVectorStore(ST, DAG); diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll index d30aeb5..9bcf35e 100644 --- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -601,4 +601,68 @@ define amdgpu_kernel void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out ret void } +; SI-LABEL: {{^}}private_load_align1_f64: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +define double @private_load_align1_f64(double addrspace(5)* %in) { + %x = load double, double addrspace(5)* %in, align 1 + ret double %x +} + +; SI-LABEL: {{^}}private_store_align1_f64: +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +define void @private_store_align1_f64(double addrspace(5)* %out, double %x) #0 { + store double %x, double addrspace(5)* %out, align 1 + ret void +} + +; SI-LABEL: {{^}}private_load_align4_f64: +; SI: buffer_load_dword +; SI: buffer_load_dword +define double @private_load_align4_f64(double addrspace(5)* %in) { + %x = load double, double addrspace(5)* %in, align 4 + ret double %x +} + +; SI-LABEL: {{^}}private_store_align4_f64: +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @private_store_align4_f64(double addrspace(5)* %out, double %x) #0 { + store double %x, double addrspace(5)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}private_load_align2_f64: +; SI: buffer_load_ushort +; SI: buffer_load_ushort +; SI: buffer_load_ushort +; SI: buffer_load_ushort +define double @private_load_align2_f64(double addrspace(5)* %in) { + %x = load double, double addrspace(5)* %in, align 2 + ret double %x +} + +; SI-LABEL: {{^}}private_store_align2_f64: +; SI: buffer_store_short +; SI: buffer_store_short +; SI: buffer_store_short +; SI: buffer_store_short +define void @private_store_align2_f64(double addrspace(5)* %out, double %x) #0 { + store double %x, double addrspace(5)* %out, align 2 + ret void +} + attributes #0 = { nounwind } -- 2.7.4