From 44096e6904e10bb313fef2f6aaff25c25d1325f7 Mon Sep 17 00:00:00 2001 From: CaprYang Date: Wed, 17 May 2023 21:39:36 +0100 Subject: [PATCH] [InferAddressSpaces] Handle vector of pointers type & Support intrinsic masked gather/scatter --- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 79 +++++++++----- .../Transforms/InferAddressSpaces/AMDGPU/icmp.ll | 3 +- .../InferAddressSpaces/masked-gather-scatter.ll | 36 +++++++ .../InferAddressSpaces/vector-of-pointers.ll | 115 +++++++++++++++++++++ 4 files changed, 204 insertions(+), 29 deletions(-) create mode 100644 llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index b671373..a82be5b 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -256,6 +256,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", false, false) +static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) { + assert(Ty->isPtrOrPtrVectorTy()); + PointerType *NPT = PointerType::get(Ty->getContext(), NewAddrSpace); + return Ty->getWithNewType(NPT); +} + // Check whether that's no-op pointer bicast using a pair of // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over // different address spaces. @@ -301,14 +307,14 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL, switch (Op->getOpcode()) { case Instruction::PHI: - assert(Op->getType()->isPointerTy()); + assert(Op->getType()->isPtrOrPtrVectorTy()); return true; case Instruction::BitCast: case Instruction::AddrSpaceCast: case Instruction::GetElementPtr: return true; case Instruction::Select: - return Op->getType()->isPointerTy(); + return Op->getType()->isPtrOrPtrVectorTy(); case Instruction::Call: { const IntrinsicInst *II = dyn_cast(&V); return II && II->getIntrinsicID() == Intrinsic::ptrmask; @@ -373,6 +379,24 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, case Intrinsic::ptrmask: // This is handled as an address expression, not as a use memory operation. return false; + case Intrinsic::masked_gather: { + Type *RetTy = II->getType(); + Type *NewPtrTy = NewV->getType(); + Function *NewDecl = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy}); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return true; + } + case Intrinsic::masked_scatter: { + Type *ValueTy = II->getOperand(0)->getType(); + Type *NewPtrTy = NewV->getType(); + Function *NewDecl = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), {ValueTy, NewPtrTy}); + II->setArgOperand(1, NewV); + II->setCalledFunction(NewDecl); + return true; + } default: { Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); if (!Rewrite) @@ -394,6 +418,14 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; + case Intrinsic::masked_gather: + appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), + PostorderStack, Visited); + break; + case Intrinsic::masked_scatter: + appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1), + PostorderStack, Visited); + break; default: SmallVector OpIndexes; if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { @@ -412,7 +444,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( Value *V, PostorderStackTy &PostorderStack, DenseSet &Visited) const { - assert(V->getType()->isPointerTy()); + assert(V->getType()->isPtrOrPtrVectorTy()); // Generic addressing expressions may be hidden in nested constant // expressions. @@ -460,8 +492,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { // addressing calculations may also be faster. for (Instruction &I : instructions(F)) { if (auto *GEP = dyn_cast(&I)) { - if (!GEP->getType()->isVectorTy()) - PushPtrOperand(GEP->getPointerOperand()); + PushPtrOperand(GEP->getPointerOperand()); } else if (auto *LI = dyn_cast(&I)) PushPtrOperand(LI->getPointerOperand()); else if (auto *SI = dyn_cast(&I)) @@ -480,14 +511,12 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { } else if (auto *II = dyn_cast(&I)) collectRewritableIntrinsicOperands(II, PostorderStack, Visited); else if (ICmpInst *Cmp = dyn_cast(&I)) { - // FIXME: Handle vectors of pointers - if (Cmp->getOperand(0)->getType()->isPointerTy()) { + if (Cmp->getOperand(0)->getType()->isPtrOrPtrVectorTy()) { PushPtrOperand(Cmp->getOperand(0)); PushPtrOperand(Cmp->getOperand(1)); } } else if (auto *ASC = dyn_cast(&I)) { - if (!ASC->getType()->isVectorTy()) - PushPtrOperand(ASC->getPointerOperand()); + PushPtrOperand(ASC->getPointerOperand()); } else if (auto *I2P = dyn_cast(&I)) { if (isNoopPtrIntCastPair(cast(I2P), *DL, TTI)) PushPtrOperand( @@ -529,8 +558,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef( SmallVectorImpl *UndefUsesToFix) { Value *Operand = OperandUse.get(); - Type *NewPtrTy = PointerType::getWithSamePointeeType( - cast(Operand->getType()), NewAddrSpace); + Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAddrSpace); if (Constant *C = dyn_cast(Operand)) return ConstantExpr::getAddrSpaceCast(C, NewPtrTy); @@ -543,8 +571,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef( if (I != PredicatedAS.end()) { // Insert an addrspacecast on that operand before the user. unsigned NewAS = I->second; - Type *NewPtrTy = PointerType::getWithSamePointeeType( - cast(Operand->getType()), NewAS); + Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS); auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); NewI->insertBefore(Inst); NewI->setDebugLoc(Inst->getDebugLoc()); @@ -572,8 +599,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( const ValueToValueMapTy &ValueWithNewAddrSpace, const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl *UndefUsesToFix) const { - Type *NewPtrType = PointerType::getWithSamePointeeType( - cast(I->getType()), NewAddrSpace); + Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(I->getType(), NewAddrSpace); if (I->getOpcode() == Instruction::AddrSpaceCast) { Value *Src = I->getOperand(0); @@ -607,8 +633,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( if (AS != UninitializedAddressSpace) { // For the assumed address space, insert an `addrspacecast` to make that // explicit. - Type *NewPtrTy = PointerType::getWithSamePointeeType( - cast(I->getType()), AS); + Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(I->getType(), AS); auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); NewI->insertAfter(I); return NewI; @@ -617,7 +642,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( // Computes the converted pointer operands. SmallVector NewPointerOperands; for (const Use &OperandUse : I->operands()) { - if (!OperandUse.get()->getType()->isPointerTy()) + if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy()) NewPointerOperands.push_back(nullptr); else NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( @@ -629,7 +654,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( case Instruction::BitCast: return new BitCastInst(NewPointerOperands[0], NewPtrType); case Instruction::PHI: { - assert(I->getType()->isPointerTy()); + assert(I->getType()->isPtrOrPtrVectorTy()); PHINode *PHI = cast(I); PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { @@ -648,7 +673,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( return NewGEP; } case Instruction::Select: - assert(I->getType()->isPointerTy()); + assert(I->getType()->isPtrOrPtrVectorTy()); return SelectInst::Create(I->getOperand(0), NewPointerOperands[1], NewPointerOperands[2], "", nullptr, I); case Instruction::IntToPtr: { @@ -674,10 +699,10 @@ static Value *cloneConstantExprWithNewAddressSpace( ConstantExpr *CE, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, const TargetTransformInfo *TTI) { - Type *TargetType = CE->getType()->isPointerTy() - ? PointerType::getWithSamePointeeType( - cast(CE->getType()), NewAddrSpace) - : CE->getType(); + Type *TargetType = + CE->getType()->isPtrOrPtrVectorTy() + ? getPtrOrVecOfPtrsWithNewAS(CE->getType(), NewAddrSpace) + : CE->getType(); if (CE->getOpcode() == Instruction::AddrSpaceCast) { // Because CE is flat, the source address space must be specific. @@ -1226,9 +1251,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( if (AddrSpaceCastInst *ASC = dyn_cast(CurUser)) { unsigned NewAS = NewV->getType()->getPointerAddressSpace(); if (ASC->getDestAddressSpace() == NewAS) { - if (!cast(ASC->getType()) - ->hasSameElementTypeAs( - cast(NewV->getType()))) { + if (!cast(ASC->getType()->getScalarType()) + ->hasSameElementTypeAs( + cast(NewV->getType()->getScalarType()))) { BasicBlock::iterator InsertPos; if (Instruction *NewVInst = dyn_cast(NewV)) InsertPos = std::next(NewVInst->getIterator()); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll index b9e0c13..0c8d7a2 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll @@ -147,9 +147,8 @@ define i1 @icmp_mismatch_flat_group_private_cmp_undef_swap(ptr addrspace(3) %gro ret i1 %cmp } -; TODO: Should be handled ; CHECK-LABEL: @icmp_flat_flat_from_group_vector( -; CHECK: %cmp = icmp eq <2 x ptr> %cast0, %cast1 +; CHECK: %cmp = icmp eq <2 x ptr addrspace(3)> %group.ptr.0, %group.ptr.1 define <2 x i1> @icmp_flat_flat_from_group_vector(<2 x ptr addrspace(3)> %group.ptr.0, <2 x ptr addrspace(3)> %group.ptr.1) #0 { %cast0 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.0 to <2 x ptr> %cast1 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.1 to <2 x ptr> diff --git a/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll new file mode 100644 index 0000000..9e051ae --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck %s + +define <4 x i32> @masked_gather_inferas(ptr addrspace(1) %out, <4 x i64> %index) { +; CHECK-LABEL: define <4 x i32> @masked_gather_inferas +; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]] +; CHECK-NEXT: [[VALUE:%.*]] = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p1(<4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: ret <4 x i32> [[VALUE]] +; +entry: + %out.1 = addrspacecast ptr addrspace(1) %out to ptr + %ptrs = getelementptr inbounds i32, ptr %out.1, <4 x i64> %index + %value = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> poison) + ret <4 x i32> %value +} + +define void @masked_scatter_inferas(ptr addrspace(1) %out, <4 x i64> %index, <4 x i32> %value) { +; CHECK-LABEL: define void @masked_scatter_inferas +; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]], <4 x i32> [[VALUE:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]] +; CHECK-NEXT: tail call void @llvm.masked.scatter.v4i32.v4p1(<4 x i32> [[VALUE]], <4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> ) +; CHECK-NEXT: ret void +; +entry: + %out.1 = addrspacecast ptr addrspace(1) %out to ptr + %ptrs = getelementptr inbounds i32, ptr %out.1, <4 x i64> %index + tail call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %value, <4 x ptr> %ptrs, i32 4, <4 x i1> ) + ret void +} + +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i32>) + +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <4 x i1>) diff --git a/llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll b/llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll new file mode 100644 index 0000000..f489bbe --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck %s + +define void @double_ascast(<4 x ptr addrspace(3)> %input) { +; CHECK-LABEL: define void @double_ascast +; CHECK-SAME: (<4 x ptr addrspace(3)> [[INPUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[INPUT]]) +; CHECK-NEXT: ret void +; +entry: + %tmp0 = addrspacecast <4 x ptr addrspace(3)> %input to <4 x ptr> + %tmp1 = addrspacecast <4 x ptr> %tmp0 to <4 x ptr addrspace(3)> + call void @use(<4 x ptr addrspace(3)> %tmp1) + ret void +} + +define void @double_gep(ptr addrspace(3) %input, <4 x i64> %i, i64 %j) { +; CHECK-LABEL: define void @double_gep +; CHECK-SAME: (ptr addrspace(3) [[INPUT:%.*]], <4 x i64> [[I:%.*]], i64 [[J:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(3) [[INPUT]], <4 x i64> [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, <4 x ptr addrspace(3)> [[TMP1]], i64 [[J]] +; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[TMP2]]) +; CHECK-NEXT: ret void +; +entry: + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr + %tmp1 = getelementptr float, ptr %tmp0, <4 x i64> %i + %tmp2 = getelementptr float, <4 x ptr> %tmp1, i64 %j + %tmp3 = addrspacecast <4 x ptr> %tmp2 to <4 x ptr addrspace(3)> + call void @use(<4 x ptr addrspace(3)> %tmp3) + ret void +} + +define void @inferas_phi(<4 x ptr addrspace(3)> %input, i1 %cond) { +; CHECK-LABEL: define void @inferas_phi +; CHECK-SAME: (<4 x ptr addrspace(3)> [[INPUT:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND]], label [[INC:%.*]], label [[END:%.*]] +; CHECK: inc: +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr addrspace(3)> [[INPUT]], i64 1 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x ptr addrspace(3)> [ [[INPUT]], [[ENTRY:%.*]] ], [ [[TMP1]], [[INC]] ] +; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[TMP2]]) +; CHECK-NEXT: ret void +; +entry: + %tmp0 = addrspacecast <4 x ptr addrspace(3)> %input to <4 x ptr> + br i1 %cond, label %inc, label %end + +inc: + %tmp1 = getelementptr float, <4 x ptr> %tmp0, i64 1 + br label %end + +end: + %tmp2 = phi <4 x ptr> [ %tmp0, %entry ], [ %tmp1, %inc ] + %tmp3 = addrspacecast <4 x ptr> %tmp2 to <4 x ptr addrspace(3)> + call void @use(<4 x ptr addrspace(3)> %tmp3) + ret void +} + +define void @inferas_ptr2int2ptr(<4 x ptr addrspace(3)> %input) { +; CHECK-LABEL: define void @inferas_ptr2int2ptr +; CHECK-SAME: (<4 x ptr addrspace(3)> [[INPUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[INPUT]]) +; CHECK-NEXT: ret void +; +entry: + %tmp0 = addrspacecast <4 x ptr addrspace(3)> %input to <4 x ptr> + %tmp1 = ptrtoint <4 x ptr> %tmp0 to <4 x i64> + %tmp2 = inttoptr <4 x i64> %tmp1 to <4 x ptr> + %tmp3 = addrspacecast <4 x ptr> %tmp2 to <4 x ptr addrspace(3)> + call void @use(<4 x ptr addrspace(3)> %tmp3) + ret void +} + +define void @inferas_loop(<4 x ptr addrspace(3)> %begin, <4 x ptr addrspace(3)> %end) { +; CHECK-LABEL: define void @inferas_loop +; CHECK-SAME: (<4 x ptr addrspace(3)> [[BEGIN:%.*]], <4 x ptr addrspace(3)> [[END:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[NOW:%.*]] = phi <4 x ptr addrspace(3)> [ [[BEGIN]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[NOW]]) +; CHECK-NEXT: [[NEXT]] = getelementptr float, <4 x ptr addrspace(3)> [[NOW]], i64 1 +; CHECK-NEXT: [[VEQ:%.*]] = icmp eq <4 x ptr addrspace(3)> [[NEXT]], [[END]] +; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i1> [[VEQ]] to i4 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i4 [[MASK]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %begin0 = addrspacecast <4 x ptr addrspace(3)> %begin to <4 x ptr> + %end0 = addrspacecast <4 x ptr addrspace(3)> %end to <4 x ptr> + br label %loop + +loop: + %now = phi <4 x ptr> [ %begin0, %entry ], [ %next, %loop ] + %now3 = addrspacecast <4 x ptr> %now to <4 x ptr addrspace(3)> + call void @use(<4 x ptr addrspace(3)> %now3) + %next = getelementptr float, <4 x ptr> %now, i64 1 + %veq = icmp eq <4 x ptr> %next, %end0 + %mask = bitcast <4 x i1> %veq to i4 + %cond = icmp eq i4 %mask, 0 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +declare void @use(<4 x ptr addrspace(3)>) -- 2.7.4