[AMDGPU] Fix for negative offsets in buffer/tbuffer intrinsics

author Tim Renouf <tpr.llvm@botech.co.uk>

Wed, 3 Oct 2018 10:29:43 +0000 (10:29 +0000)

committer Tim Renouf <tpr.llvm@botech.co.uk>

Wed, 3 Oct 2018 10:29:43 +0000 (10:29 +0000)
author Tim Renouf <tpr.llvm@botech.co.uk>
Wed, 3 Oct 2018 10:29:43 +0000 (10:29 +0000)
committer Tim Renouf <tpr.llvm@botech.co.uk>
Wed, 3 Oct 2018 10:29:43 +0000 (10:29 +0000)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index 1bc430b..7f1467c 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5983,11 +5983,18 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
    if (C1) {
      unsigned ImmOffset = C1->getZExtValue();
      // If the immediate value is too big for the immoffset field, put the value
-    // mod 4096 into the immoffset field so that the value that is copied/added
+    // and -4096 into the immoffset field so that the value that is copied/added
      // for the voffset field is a multiple of 4096, and it stands more chance
      // of being CSEd with the copy/add for another similar load/store.
+    // However, do not do that rounding down to a multiple of 4096 if that is a
+    // negative number, as it appears to be illegal to have a negative offset
+    // in the vgpr, even if adding the immediate offset makes it positive.
      unsigned Overflow = ImmOffset & ~MaxImm;
      ImmOffset -= Overflow;
+    if ((int32_t)Overflow < 0) {
+      Overflow += ImmOffset;
+      ImmOffset = 0;
+    }
      C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
      if (Overflow) {
        auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll

index 7669507..dcae08d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
@@ -74,8 +74,8 @@ main_body:
  }
  
  ;CHECK-LABEL: {{^}}buffer_load_negative_offset:
-;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, 0xfffff000, v0
-;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen offset:4080
+;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
+;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
  define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
  main_body:
    %ofs.1 = add i32 %ofs, -16
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll

index 71fbdaa..5484c8b 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
@@ -102,8 +102,8 @@ main_body:
  }
  
  ;CHECK-LABEL: {{^}}buffer_load_negative_offset:
-;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, 0xfffff000, v0
-;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen offset:4080
+;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, -16, v0
+;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen
  define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
  main_body:
    %ofs.1 = add i32 %ofs, -16
author	Tim Renouf <tpr.llvm@botech.co.uk>
	Wed, 3 Oct 2018 10:29:43 +0000 (10:29 +0000)
committer	Tim Renouf <tpr.llvm@botech.co.uk>
	Wed, 3 Oct 2018 10:29:43 +0000 (10:29 +0000)
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll		patch \| blob \| history