From 95d2eb95c303f859adb9d3b19ecce056e539f61d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 6 Mar 2013 05:44:53 +0000 Subject: [PATCH] InstCombine: Don't shrink allocas when combining with a bitcast. When considering folding a bitcast of an alloca into the alloca itself, make sure we don't shrink the amount of memory being allocated, or things rapidly go sideways. rdar://13324424 llvm-svn: 176547 --- .../Transforms/InstCombine/InstCombineCasts.cpp | 6 +++ llvm/test/Transforms/Inline/devirtualize.ll | 2 +- .../2013-03-05-Combine-BitcastTy-Into-Alloca.ll | 45 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index a960ab2..d162223 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; diff --git a/llvm/test/Transforms/Inline/devirtualize.ll b/llvm/test/Transforms/Inline/devirtualize.ll index 18bbf7a..d46154e 100644 --- a/llvm/test/Transforms/Inline/devirtualize.ll +++ b/llvm/test/Transforms/Inline/devirtualize.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce < %s | FileCheck %s +; RUN: opt -S -Os < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll new file mode 100644 index 0000000..b20c3a0 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +%struct._my_struct = type <{ [12 x i8], [4 x i8] }> + +@initval = common global %struct._my_struct zeroinitializer, align 1 + +; InstCombine will try to change the %struct._my_struct alloca into an +; allocation of an i96 because of the bitcast to create %2. That's not valid, +; as the other 32 bits of the structure still feed into the return value +define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK: @function +; CHECK-NEXT: entry: +; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8 +; CHECK-NOT: bitcast i96* %retval to %struct._my_struct* +entry: + %retval = alloca %struct._my_struct, align 8 + %k.sroa.0.0.copyload = load i96* bitcast (%struct._my_struct* @initval to i96*), align 1 + %k.sroa.1.12.copyload = load i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1 + %0 = zext i32 %x to i96 + %bf.value = shl nuw nsw i96 %0, 6 + %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744 + %1 = zext i32 %y to i96 + %bf.value2 = shl nuw nsw i96 %1, 32 + %bf.shl3 = and i96 %bf.value2, 288230371856744448 + %bf.value.masked = and i96 %bf.value, 4294967232 + %2 = zext i32 %z to i96 + %bf.value8 = and i96 %2, 63 + %bf.clear4 = or i96 %bf.shl3, %bf.value.masked + %bf.set5 = or i96 %bf.clear4, %bf.value8 + %bf.set10 = or i96 %bf.set5, %bf.clear + %retval.0.cast7 = bitcast %struct._my_struct* %retval to i96* + store i96 %bf.set10, i96* %retval.0.cast7, align 8 + %retval.12.idx8 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 1 + %retval.12.cast9 = bitcast [4 x i8]* %retval.12.idx8 to i32* + store i32 %k.sroa.1.12.copyload, i32* %retval.12.cast9, align 4 + %trunc = trunc i96 %bf.set10 to i64 + %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0 + %retval.8.idx12 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 0, i64 8 + %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64* + %retval.8.load14 = load i64* %retval.8.cast13, align 8 + %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1 + ret { i64, i64 } %.fca.1.insert +} -- 2.7.4