DAG: Fix expansion of unaligned FP loads and stores

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 13 Sep 2018 12:14:23 +0000 (12:14 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 13 Sep 2018 12:14:23 +0000 (12:14 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 13 Sep 2018 12:14:23 +0000 (12:14 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 13 Sep 2018 12:14:23 +0000 (12:14 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index eb44560..d7d4bef 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4153,7 +4153,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
    if (VT.isFloatingPoint() || VT.isVector()) {
      EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
      if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
-      if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
+      if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
+          LoadedVT.isVector()) {
          // Scalarize the load and let the individual components be handled.
          SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
          if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
@@ -4303,13 +4304,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
    EVT VT = Val.getValueType();
    int Alignment = ST->getAlignment();
    auto &MF = DAG.getMachineFunction();
+  EVT MemVT = ST->getMemoryVT();
  
    SDLoc dl(ST);
-  if (ST->getMemoryVT().isFloatingPoint() ||
-      ST->getMemoryVT().isVector()) {
+  if (MemVT.isFloatingPoint() || MemVT.isVector()) {
      EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
      if (isTypeLegal(intVT)) {
-      if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
+      if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
+          MemVT.isVector()) {
          // Scalarize the store and let the individual components be handled.
          SDValue Result = scalarizeVectorStore(ST, DAG);
  
diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll

index d30aeb5..9bcf35e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
@@ -601,4 +601,68 @@ define amdgpu_kernel void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out
    ret void
  }
  
+; SI-LABEL: {{^}}private_load_align1_f64:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+define double @private_load_align1_f64(double addrspace(5)* %in) {
+  %x = load double, double addrspace(5)* %in, align 1
+  ret double %x
+}
+
+; SI-LABEL: {{^}}private_store_align1_f64:
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @private_store_align1_f64(double addrspace(5)* %out, double %x) #0 {
+  store double %x, double addrspace(5)* %out, align 1
+  ret void
+}
+
+; SI-LABEL: {{^}}private_load_align4_f64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+define double @private_load_align4_f64(double addrspace(5)* %in) {
+  %x = load double, double addrspace(5)* %in, align 4
+  ret double %x
+}
+
+; SI-LABEL: {{^}}private_store_align4_f64:
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+define void @private_store_align4_f64(double addrspace(5)* %out, double %x) #0 {
+  store double %x, double addrspace(5)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}private_load_align2_f64:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+define double @private_load_align2_f64(double addrspace(5)* %in) {
+  %x = load double, double addrspace(5)* %in, align 2
+  ret double %x
+}
+
+; SI-LABEL: {{^}}private_store_align2_f64:
+; SI: buffer_store_short
+; SI: buffer_store_short
+; SI: buffer_store_short
+; SI: buffer_store_short
+define void @private_store_align2_f64(double addrspace(5)* %out, double %x) #0 {
+  store double %x, double addrspace(5)* %out, align 2
+  ret void
+}
+
  attributes #0 = { nounwind }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 13 Sep 2018 12:14:23 +0000 (12:14 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 13 Sep 2018 12:14:23 +0000 (12:14 +0000)
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll		patch \| blob \| history