From: Paul Walker Date: Thu, 3 Dec 2020 12:26:29 +0000 (+0000) Subject: [CodeGenPrepare] Update optimizeGatherScatterInst for scalable vectors. X-Git-Tag: llvmorg-13-init~3375 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6d35bd1d48e9fdde38483e6b22a900daa7e3d46a;p=platform%2Fupstream%2Fllvm.git [CodeGenPrepare] Update optimizeGatherScatterInst for scalable vectors. optimizeGatherScatterInst does nothing specific to fixed length vectors but uses FixedVectorType to extract the number of elements. This patch simply updates the code to use VectorType and getElementCount instead. For testing I just copied Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll replacing `<4 x ` with `(Ptr->getType())) - return false; - Value *NewAddr; if (const auto *GEP = dyn_cast(Ptr)) { @@ -5370,7 +5366,7 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, if (!RewriteGEP && Ops.size() == 2) return false; - unsigned NumElts = cast(Ptr->getType())->getNumElements(); + auto NumElts = cast(Ptr->getType())->getElementCount(); IRBuilder<> Builder(MemoryInst); @@ -5380,7 +5376,7 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, // and a vector GEP with all zeroes final index. if (!Ops[FinalIndex]->getType()->isVectorTy()) { NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); - auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); } else { Value *Base = Ops[0]; @@ -5403,13 +5399,13 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, if (!V) return false; - unsigned NumElts = cast(Ptr->getType())->getNumElements(); + auto NumElts = cast(Ptr->getType())->getElementCount(); IRBuilder<> Builder(MemoryInst); // Emit a vector GEP with a scalar pointer and all 0s vector index. Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); - auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy)); } else { // Constant, SelectionDAGBuilder knows to check if its a splat. diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll new file mode 100644 index 0000000..08011b6 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +%struct.a = type { i32, i32 } +@c = external dso_local global %struct.a, align 4 +@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16 + +define @splat_base(i32* %base, %index, %mask) #0 { +; CHECK-LABEL: @splat_base( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], [[INDEX:%.*]] +; CHECK-NEXT: [[RES:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP1]], i32 4, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[RES]] +; + %broadcast.splatinsert = insertelement undef, i32* %base, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, undef, zeroinitializer + %gep = getelementptr i32, %broadcast.splat, %index + %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) + ret %res +} + +define @splat_struct(%struct.a* %base, %mask) #0 { +; CHECK-LABEL: @splat_struct( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP2]], i32 4, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[RES]] +; + %gep = getelementptr %struct.a, %struct.a* %base, zeroinitializer, i32 1 + %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) + ret %res +} + +define @scalar_index(i32* %base, i64 %index, %mask) #0 { +; CHECK-LABEL: @scalar_index( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP2]], i32 4, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[RES]] +; + %broadcast.splatinsert = insertelement undef, i32* %base, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, undef, zeroinitializer + %gep = getelementptr i32, %broadcast.splat, i64 %index + %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) + ret %res +} + +define @splat_index(i32* %base, i64 %index, %mask) #0 { +; CHECK-LABEL: @splat_index( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP2]], i32 4, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[RES]] +; + %broadcast.splatinsert = insertelement undef, i64 %index, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, undef, zeroinitializer + %gep = getelementptr i32, i32* %base, %broadcast.splat + %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) + ret %res +} + +define @test_global_array( %indxs, %mask) #0 { +; CHECK-LABEL: @test_global_array( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), [[INDXS:%.*]] +; CHECK-NEXT: [[G:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP1]], i32 4, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[G]] +; + %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, %indxs + %g = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %p, i32 4, %mask, undef) + ret %g +} + +define @global_struct_splat( %mask) #0 { +; CHECK-LABEL: @global_struct_splat( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( shufflevector ( insertelement ( undef, i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32 0), undef, zeroinitializer), i32 4, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = insertelement undef, %struct.a* @c, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + %3 = getelementptr %struct.a, %2, zeroinitializer, i32 1 + %4 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %3, i32 4, %mask, undef) + ret %4 +} + +define @splat_ptr_gather(i32* %ptr, %mask, %passthru) #0 { +; CHECK-LABEL: @splat_ptr_gather( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP1]], i32 4, [[MASK:%.*]], [[PASSTHRU:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = insertelement undef, i32* %ptr, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + %3 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %2, i32 4, %mask, %passthru) + ret %3 +} + +define void @splat_ptr_scatter(i32* %ptr, %mask, %val) #0 { +; CHECK-LABEL: @splat_ptr_scatter( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( [[VAL:%.*]], [[TMP1]], i32 4, [[MASK:%.*]]) +; CHECK-NEXT: ret void +; + %1 = insertelement undef, i32* %ptr, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %val, %2, i32 4, %mask) + ret void +} + +declare @llvm.masked.gather.nxv4i32.nxv4p0i32(, i32, , ) +declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(, , i32, ) + +attributes #0 = { "target-features"="+sve" }