[VectorCombine] Insert addrspacecast when crossing address space boundaries

author Fraser Cormack <fraser@codeplay.com>

Wed, 16 Mar 2022 10:14:07 +0000 (10:14 +0000)

committer Fraser Cormack <fraser@codeplay.com>

Thu, 24 Mar 2022 19:08:08 +0000 (19:08 +0000)
author Fraser Cormack <fraser@codeplay.com>
Wed, 16 Mar 2022 10:14:07 +0000 (10:14 +0000)
committer Fraser Cormack <fraser@codeplay.com>
Thu, 24 Mar 2022 19:08:08 +0000 (19:08 +0000)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp

index e938ca6..59a926b 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -152,12 +152,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
    Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
    assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
  
-  // If original AS != Load's AS, we can't bitcast the original pointer and have
-  // to use Load's operand instead. Ideally we would want to strip pointer casts
-  // without changing AS, but there's no API to do that ATM.
    unsigned AS = Load->getPointerAddressSpace();
-  if (AS != SrcPtr->getType()->getPointerAddressSpace())
-    SrcPtr = Load->getPointerOperand();
  
    // We are potentially transforming byte-sized (8-bit) memory accesses, so make
    // sure we have all of our type-based constraints in place for this target.
@@ -245,7 +240,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
    // It is safe and potentially profitable to load a vector directly:
    // inselt undef, load Scalar, 0 --> load VecPtr
    IRBuilder<> Builder(Load);
-  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
+  Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+      SrcPtr, MinVecTy->getPointerTo(AS));
    Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
    VecLd = Builder.CreateShuffleVector(VecLd, Mask);
  
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll

index 688e3c4..b493c2a 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll
@@ -11,9 +11,7 @@ define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture n
  ; CHECK-LABEL: @load_from_other_as(
  ; CHECK-NEXT:  bb:
  ; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to <1 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
  ; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll

index 3468cb5..8e36856 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll
@@ -11,9 +11,7 @@ define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture n
  ; CHECK-LABEL: @load_from_other_as(
  ; CHECK-NEXT:  bb:
  ; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to <1 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
  ; CHECK-NEXT:    [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
  ; CHECK-NEXT:    store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

index cf5a6c0..e4ea44a 100644 (file)
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -253,6 +253,23 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(
    ret <4 x float> %r
  }
  
+; Should work with addrspace even when peeking past unsafe loads through geps
+
+define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(i32* align 16 dereferenceable(16) %v3) {
+; CHECK-LABEL: @unsafe_load_i32_insert_v4i32_addrspace(
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast i32* [[V3:%.*]] to <4 x i32> addrspace(42)*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(42)* [[TMP1]], align 16
+; CHECK-NEXT:    [[INSELT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[INSELT]]
+;
+  %t0 = getelementptr inbounds i32, i32* %v3, i32 1
+  %t1 = addrspacecast i32* %t0 to i32 addrspace(42)*
+  %t2 = getelementptr inbounds i32, i32 addrspace(42)* %t1, i64 1
+  %val = load i32, i32 addrspace(42)* %t2, align 4
+  %inselt = insertelement <4 x i32> poison, i32 %val, i32 0
+  ret <4 x i32> %inselt
+}
+
  ; If there are enough dereferenceable bytes, we can offset the vector load.
  
  define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync {
author	Fraser Cormack <fraser@codeplay.com>
	Wed, 16 Mar 2022 10:14:07 +0000 (10:14 +0000)
committer	Fraser Cormack <fraser@codeplay.com>
	Thu, 24 Mar 2022 19:08:08 +0000 (19:08 +0000)
llvm/lib/Transforms/Vectorize/VectorCombine.cpp		patch \| blob \| history
llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll		patch \| blob \| history
llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll		patch \| blob \| history
llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll		patch \| blob \| history