From ff55d01a8e1b60c137cd8cab7d9eeef14284fd7a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 16 Apr 2021 11:37:36 -0700 Subject: [PATCH] [nofree] Restrict semantics to memory visible to caller This patch clarifies the semantics of the nofree function attribute to make clear that it provides an "as if" semantic. That is, a nofree function is guaranteed not to free memory which existed before the call, but might allocate and then deallocate that same memory within the lifetime of the callee. This is the result of the discussion on llvm-dev under the thread "Ambiguity in the nofree function attribute". The most important part of this change is the LangRef wording. The rest is minor comment changes to emphasize the new semantics where code was accidentally consistent, and fix one place which wasn't consistent. That one place is currently narrowly used as it is primarily part of the ongoing (and not yet enabled) deref-at-point semantics work. Differential Revision: https://reviews.llvm.org/D100141 --- llvm/docs/LangRef.rst | 21 ++++++++++---- llvm/lib/IR/Value.cpp | 17 +++++++----- .../InstCombine/InstructionCombining.cpp | 6 ++-- llvm/test/Transforms/LICM/hoist-alloc.ll | 4 +-- .../LoopVectorize/X86/load-deref-pred.ll | 32 +++++++++++----------- 5 files changed, 47 insertions(+), 33 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 628f23e..f06e95f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1598,12 +1598,21 @@ example: call is dead after inlining. ``nofree`` This function attribute indicates that the function does not, directly or - indirectly, call a memory-deallocation function (free, for example). As a - result, uncaptured pointers that are known to be dereferenceable prior to a - call to a function with the ``nofree`` attribute are still known to be - dereferenceable after the call (the capturing condition is necessary in - environments where the function might communicate the pointer to another thread - which then deallocates the memory). + transitively, call a memory-deallocation function (``free``, for example) + on a memory allocation which existed before the call. + + As a result, uncaptured pointers that are known to be dereferenceable + prior to a call to a function with the ``nofree`` attribute are still + known to be dereferenceable after the call. The capturing condition is + necessary in environments where the function might communicate the + pointer to another thread which then deallocates the memory. Alternatively, + ``nosync`` would ensure such communication cannot happen and even captured + pointers cannot be freed by the function. + + A ``nofree`` function is explicitly allowed to free memory which it + allocated or (if not ``nosync``) arrange for another thread to free + memory on it's behalf. As a result, perhaps surprisingly, a ``nofree`` + function can return a pointer to a previously deallocated memory object. ``noimplicitfloat`` This attributes disables implicit floating-point instructions. ``noinline`` diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 544f4d6..7003508 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -736,9 +736,18 @@ bool Value::canBeFreed() const { // Handle byval/byref/sret/inalloca/preallocated arguments. The storage // lifetime is guaranteed to be longer than the callee's lifetime. - if (auto *A = dyn_cast(this)) + if (auto *A = dyn_cast(this)) { if (A->hasPointeeInMemoryValueAttr()) return false; + // A pointer to an object in a function which neither frees, nor can arrange + // for another thread to free on its behalf, can not be freed in the scope + // of the function. Note that this logic is restricted to memory + // allocations in existance before the call; a nofree function *is* allowed + // to free memory it allocated. + const Function *F = A->getParent(); + if (F->doesNotFreeMemory() && F->hasNoSync()) + return false; + } const Function *F = nullptr; if (auto *I = dyn_cast(this)) @@ -749,12 +758,6 @@ bool Value::canBeFreed() const { if (!F) return true; - // A pointer to an object in a function which neither frees, nor can arrange - // for another thread to free on its behalf, can not be freed in the scope - // of the function. - if (F->doesNotFreeMemory() && F->hasNoSync()) - return false; - // With garbage collection, deallocation typically occurs solely at or after // safepoints. If we're compiling for a collector which uses the // gc.statepoint infrastructure, safepoints aren't explicitly present diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 050f871..919c8621 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2802,8 +2802,10 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) { // If we free a pointer we've been explicitly told won't be freed, this // would be full UB and thus we can conclude this is unreachable. Cases: // 1) freeing a pointer which is explicitly nofree - // 2) calling free from a call site marked nofree - // 3) calling free in a function scope marked nofree + // 2) calling free from a call site marked nofree (TODO: can generalize + // for non-arguments) + // 3) calling free in a function scope marked nofree (when we can prove + // the allocation existed before the start of the function scope) if (auto *A = dyn_cast(Op->stripPointerCasts())) if (A->hasAttribute(Attribute::NoFree) || FI.hasFnAttr(Attribute::NoFree) || diff --git a/llvm/test/Transforms/LICM/hoist-alloc.ll b/llvm/test/Transforms/LICM/hoist-alloc.ll index 04992c9..cb0d18a 100644 --- a/llvm/test/Transforms/LICM/hoist-alloc.ll +++ b/llvm/test/Transforms/LICM/hoist-alloc.ll @@ -178,11 +178,11 @@ define i8 @test_hoist_malloc_leak() nofree nosync { ; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @malloc(i64 32) ; CHECK-NEXT: call void @init(i8* [[A_RAW]]) ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 -; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: call void @use(i8 [[RES]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 @@ -372,11 +372,11 @@ define i8 @test_hoist_allocsize_leak() nofree nosync { ; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @my_alloc(i64 32) ; CHECK-NEXT: call void @init(i8* [[A_RAW]]) ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 -; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: call void @use(i8 [[RES]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index be86591..362f407 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2299,24 +2299,24 @@ define i32 @test_allocsize(i64 %len, i1* %test_base) nofree nosync { ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0 ; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4 ; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8 ; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12 ; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) ; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], ; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], ; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], ; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]] ; CHECK-NEXT: [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]] @@ -2467,24 +2467,24 @@ define i32 @test_allocsize_array(i64 %len, i1* %test_base) nofree nosync { ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0 ; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4 ; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8 ; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12 ; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) ; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], ; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], ; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], ; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]] ; CHECK-NEXT: [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]] -- 2.7.4