From 9d966a2002deedc3809dff9ba51309d712620d9d Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 15 Oct 2012 10:24:40 +0000 Subject: [PATCH] Follow-up fix to r165928: handle memset rewriting for widened integers, and generally clean up the memset handling. It had rotted a bit as the other rewriting logic got polished more. llvm-svn: 165930 --- llvm/lib/Transforms/Scalar/SROA.cpp | 55 ++++++++++++++++------------------ llvm/test/Transforms/SROA/basictest.ll | 13 ++++++++ 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index a2267d0..419756d 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2591,10 +2591,11 @@ private: // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memset. - if (!VecTy && (BeginOffset != NewAllocaBeginOffset || - EndOffset != NewAllocaEndOffset || - !AllocaTy->isSingleValueType() || - !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { + if (!VecTy && !IntTy && + (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaEndOffset || + !AllocaTy->isSingleValueType() || + !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); CallInst *New @@ -2612,32 +2613,24 @@ private: // a sensible representation for the alloca type. This is essentially // splatting the byte to a sufficiently wide integer, bitcasting to the // desired scalar type, and splatting it across any desired vector type. + uint64_t Size = EndOffset - BeginOffset; Value *V = II.getValue(); IntegerType *VTy = cast(V->getType()); - Type *IntTy = Type::getIntNTy(VTy->getContext(), - TD.getTypeSizeInBits(ScalarTy)); - if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth()) - V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")), + Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); + if (Size*8 > VTy->getBitWidth()) + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), ConstantExpr::getUDiv( - Constant::getAllOnesValue(IntTy), + Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), - IntTy)), + SplatIntTy)), getName(".isplat")); - if (V->getType() != ScalarTy) { - if (ScalarTy->isPointerTy()) - V = IRB.CreateIntToPtr(V, ScalarTy); - else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy()) - V = IRB.CreateBitCast(V, ScalarTy); - else if (ScalarTy->isIntegerTy()) - llvm_unreachable("Computed different integer types with equal widths"); - else - llvm_unreachable("Invalid scalar type"); - } // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { + if (V->getType() != ScalarTy) + V = convertValue(TD, IRB, V, ScalarTy); StoreInst *Store = IRB.CreateAlignedStore( IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2650,18 +2643,20 @@ private: return true; } - // Splat to a vector if needed. - if (VectorType *VecTy = dyn_cast(AllocaTy)) { - VectorType *SplatSourceTy = VectorType::get(V->getType(), 1); - V = IRB.CreateShuffleVector( - IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V, - IRB.getInt32(0), getName(".vsplat.insert")), - UndefValue::get(SplatSourceTy), - ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)), - getName(".vsplat.shuffle")); - assert(V->getType() == VecTy); + // If this is a memset on an alloca where we can widen stores, insert the + // set integer. + if (IntTy && (BeginOffset > NewAllocaBeginOffset || + EndOffset < NewAllocaEndOffset)) { + assert(!II.isVolatile()); + StoreInst *Store = insertInteger(IRB, V, BeginOffset); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; } + if (V->getType() != AllocaTy) + V = convertValue(TD, IRB, V, AllocaTy); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), II.isVolatile()); (void)New; diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index 4a87d91..b33ffa6 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -1034,11 +1034,24 @@ entry: %X.sroa.0.i = alloca double, align 8 %0 = bitcast double* %X.sroa.0.i to i8* call void @llvm.lifetime.start(i64 -1, i8* %0) + + ; Store to the low 32-bits... %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32* store i32 0, i32* %X.sroa.0.0.cast2.i, align 8 + + ; Also use a memset to the middle 32-bits for fun. + %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2 + call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false) + + ; Or a memset of the whole thing. + call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false) + + ; Store to the high 32-bits... %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4 %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32* store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4 + + ; Do the actual math... %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8 %accum.real.i = load double* %d, align 8 %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i -- 2.7.4