From 1bd2d335b649f2e09d7e4bdd0b92c78489ded022 Mon Sep 17 00:00:00 2001 From: Ella Ma Date: Thu, 8 Jun 2023 20:22:58 +0800 Subject: [PATCH] [analyzer][CStringChecker] Adjust the invalidation operation on the super region of the destination buffer during string copy Fixing GitHub issue: https://github.com/llvm/llvm-project/issues/55019 Following the previous fix https://reviews.llvm.org/D12571 on issue https://github.com/llvm/llvm-project/issues/23328 The two issues report false memory leaks after calling string-copy APIs with a buffer field in an object as the destination. The buffer invalidation incorrectly drops the assignment to a heap memory block when no overflow problems happen. And the pointer of the dropped assignment is declared in the same object of the destination buffer. The previous fix only considers the `memcpy` functions whose copy length is available from arguments. In this issue, the copy length is inferable from the buffer declaration and string literals being copied. Therefore, I have adjusted the previous fix to reuse the copy length computed before. Besides, for APIs that never overflow (strsep) or we never know whether they can overflow (std::copy), new invalidation operations have been introduced to inform CStringChecker::InvalidateBuffer whether or not to invalidate the super region that encompasses the destination buffer. Reviewed By: steakhal Differential Revision: https://reviews.llvm.org/D152435 --- .../lib/StaticAnalyzer/Checkers/CStringChecker.cpp | 210 ++++++++++++++------- .../test/Analysis/Inputs/system-header-simulator.h | 2 + clang/test/Analysis/issue-55019.cpp | 89 +++++++++ clang/test/Analysis/pr22954.c | 5 +- 4 files changed, 231 insertions(+), 75 deletions(-) create mode 100644 clang/test/Analysis/issue-55019.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 01a3550..af9cf14 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -260,11 +260,34 @@ public: const Expr *expr, SVal val) const; - static ProgramStateRef InvalidateBuffer(CheckerContext &C, - ProgramStateRef state, - const Expr *Ex, SVal V, - bool IsSourceBuffer, - const Expr *Size); + /// Invalidate the destination buffer determined by characters copied. + static ProgramStateRef + invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, + const Expr *BufE, SVal BufV, SVal SizeV, + QualType SizeTy); + + /// Operation never overflows, do not invalidate the super region. + static ProgramStateRef invalidateDestinationBufferNeverOverflows( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); + + /// We do not know whether the operation can overflow (e.g. size is unknown), + /// invalidate the super region and escape related pointers. + static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); + + /// Invalidate the source buffer for escaping pointers. + static ProgramStateRef invalidateSourceBuffer(CheckerContext &C, + ProgramStateRef S, + const Expr *BufE, SVal BufV); + + /// @param InvalidationTraitOperations Determine how to invlidate the + /// MemRegion by setting the invalidation traits. Return true to cause pointer + /// escape, or false otherwise. + static ProgramStateRef invalidateBufferAux( + CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V, + llvm::function_ref + InvalidationTraitOperations); static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, const MemRegion *MR); @@ -310,10 +333,9 @@ public: // Return true if the destination buffer of the copy function may be in bound. // Expects SVal of Size to be positive and unsigned. // Expects SVal of FirstBuf to be a FieldRegion. - static bool IsFirstBufInBound(CheckerContext &C, - ProgramStateRef state, - const Expr *FirstBuf, - const Expr *Size); + static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State, + SVal BufVal, QualType BufTy, SVal LengthVal, + QualType LengthTy); }; } //end anonymous namespace @@ -967,43 +989,40 @@ const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, return strRegion->getStringLiteral(); } -bool CStringChecker::IsFirstBufInBound(CheckerContext &C, - ProgramStateRef state, - const Expr *FirstBuf, - const Expr *Size) { +bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, + SVal BufVal, QualType BufTy, + SVal LengthVal, QualType LengthTy) { // If we do not know that the buffer is long enough we return 'true'. // Otherwise the parent region of this field region would also get // invalidated, which would lead to warnings based on an unknown state. + if (LengthVal.isUnknown()) + return false; + // Originally copied from CheckBufferAccess and CheckLocation. - SValBuilder &svalBuilder = C.getSValBuilder(); - ASTContext &Ctx = svalBuilder.getContext(); - const LocationContext *LCtx = C.getLocationContext(); + SValBuilder &SB = C.getSValBuilder(); + ASTContext &Ctx = C.getASTContext(); - QualType sizeTy = Size->getType(); QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); - SVal BufVal = state->getSVal(FirstBuf, LCtx); - SVal LengthVal = state->getSVal(Size, LCtx); std::optional Length = LengthVal.getAs(); if (!Length) return true; // cf top comment. // Compute the offset of the last element to be accessed: size-1. - NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs(); - SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy); + NonLoc One = SB.makeIntVal(1, LengthTy).castAs(); + SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy); if (Offset.isUnknown()) return true; // cf top comment NonLoc LastOffset = Offset.castAs(); // Check that the first buffer is sufficiently long. - SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); + SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy); std::optional BufLoc = BufStart.getAs(); if (!BufLoc) return true; // cf top comment. - SVal BufEnd = - svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); + SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); // Check for out of bound array element access. const MemRegion *R = BufEnd.getAsRegion(); @@ -1017,28 +1036,90 @@ bool CStringChecker::IsFirstBufInBound(CheckerContext &C, // FIXME: Does this crash when a non-standard definition // of a library function is encountered? assert(ER->getValueType() == C.getASTContext().CharTy && - "IsFirstBufInBound should only be called with char* ElementRegions"); + "isFirstBufInBound should only be called with char* ElementRegions"); // Get the size of the array. const SubRegion *superReg = cast(ER->getSuperRegion()); - DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder); + DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB); // Get the index of the accessed element. DefinedOrUnknownSVal Idx = ER->getIndex().castAs(); - ProgramStateRef StInBound = state->assumeInBound(Idx, SizeDV, true); + ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true); return static_cast(StInBound); } -ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, - ProgramStateRef state, - const Expr *E, SVal V, - bool IsSourceBuffer, - const Expr *Size) { +ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV, + SVal SizeV, QualType SizeTy) { + auto InvalidationTraitOperations = + [&C, S, BufTy = BufE->getType(), BufV, SizeV, + SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + // If destination buffer is a field region and access is in bound, do + // not invalidate its super region. + if (MemRegion::FieldRegionKind == R->getKind() && + isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + } + return false; + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef +CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { + auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &, + const MemRegion *R) { + return isa(R); + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows( + CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { + auto InvalidationTraitOperations = + [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + if (MemRegion::FieldRegionKind == R->getKind()) + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + return false; + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C, + ProgramStateRef S, + const Expr *BufE, + SVal BufV) { + auto InvalidationTraitOperations = + [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + ITraits.setTrait( + R->getBaseRegion(), + RegionAndSymbolInvalidationTraits::TK_PreserveContents); + ITraits.setTrait(R, + RegionAndSymbolInvalidationTraits::TK_SuppressEscape); + return true; + }; + + return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); +} + +ProgramStateRef CStringChecker::invalidateBufferAux( + CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V, + llvm::function_ref + InvalidationTraitOperations) { std::optional L = V.getAs(); if (!L) - return state; + return State; // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes // some assumptions about the value that CFRefCount can't. Even so, it should @@ -1055,29 +1136,10 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, // Invalidate this region. const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); - - bool CausesPointerEscape = false; RegionAndSymbolInvalidationTraits ITraits; - // Invalidate and escape only indirect regions accessible through the source - // buffer. - if (IsSourceBuffer) { - ITraits.setTrait(R->getBaseRegion(), - RegionAndSymbolInvalidationTraits::TK_PreserveContents); - ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); - CausesPointerEscape = true; - } else { - const MemRegion::Kind& K = R->getKind(); - if (K == MemRegion::FieldRegionKind) - if (Size && IsFirstBufInBound(C, state, E, Size)) { - // If destination buffer is a field region and access is in bound, - // do not invalidate its super region. - ITraits.setTrait( - R, - RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); - } - } + bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R); - return state->invalidateRegions(R, E, C.blockCount(), LCtx, + return State->invalidateRegions(R, E, C.blockCount(), LCtx, CausesPointerEscape, nullptr, nullptr, &ITraits); } @@ -1085,7 +1147,7 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, // If we have a non-region value by chance, just remove the binding. // FIXME: is this necessary or correct? This handles the non-Region // cases. Is it ever valid to store to these? - return state->killBinding(*L); + return State->killBinding(*L); } bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, @@ -1182,8 +1244,8 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, } else { // If the destination buffer's extent is not equal to the value of // third argument, just invalidate buffer. - State = InvalidateBuffer(C, State, DstBuffer, MemVal, - /*IsSourceBuffer*/ false, Size); + State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, + SizeVal, Size->getType()); } if (StateNullChar && !StateNonNullChar) { @@ -1208,8 +1270,8 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, } else { // If the offset is not zero and char value is not concrete, we can do // nothing but invalidate the buffer. - State = InvalidateBuffer(C, State, DstBuffer, MemVal, - /*IsSourceBuffer*/ false, Size); + State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, + SizeVal, Size->getType()); } return true; } @@ -1305,15 +1367,14 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, // can use LazyCompoundVals to copy the source values into the destination. // This would probably remove any existing bindings past the end of the // copied region, but that's still an improvement over blank invalidation. - state = - InvalidateBuffer(C, state, Dest.Expression, C.getSVal(Dest.Expression), - /*IsSourceBuffer*/ false, Size.Expression); + state = invalidateDestinationBufferBySize( + C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal, + Size.Expression->getType()); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). - state = InvalidateBuffer(C, state, Source.Expression, - C.getSVal(Source.Expression), - /*IsSourceBuffer*/ true, nullptr); + state = invalidateSourceBuffer(C, state, Source.Expression, + C.getSVal(Source.Expression)); C.addTransition(state); } @@ -1985,13 +2046,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, // can use LazyCompoundVals to copy the source values into the destination. // This would probably remove any existing bindings past the end of the // string, but that's still an improvement over blank invalidation. - state = InvalidateBuffer(C, state, Dst.Expression, *dstRegVal, - /*IsSourceBuffer*/ false, nullptr); + state = invalidateDestinationBufferBySize(C, state, Dst.Expression, + *dstRegVal, amountCopied, + C.getASTContext().getSizeType()); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). - state = InvalidateBuffer(C, state, srcExpr.Expression, srcVal, - /*IsSourceBuffer*/ true, nullptr); + state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal); // Set the C string length of the destination, if we know it. if (IsBounded && (appendK == ConcatFnKind::none)) { @@ -2206,8 +2267,9 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { // Invalidate the search string, representing the change of one delimiter // character to NUL. - State = InvalidateBuffer(C, State, SearchStrPtr.Expression, Result, - /*IsSourceBuffer*/ false, nullptr); + // As the replacement never overflows, do not invalidate its super region. + State = invalidateDestinationBufferNeverOverflows( + C, State, SearchStrPtr.Expression, Result); // Overwrite the search string pointer. The new value is either an address // further along in the same string, or NULL if there are no more tokens. @@ -2256,8 +2318,10 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, // Invalidate the destination buffer const Expr *Dst = CE->getArg(2); SVal DstVal = State->getSVal(Dst, LCtx); - State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false, - /*Size=*/nullptr); + // FIXME: As we do not know how many items are copied, we also invalidate the + // super region containing the target location. + State = + invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal); SValBuilder &SVB = C.getSValBuilder(); diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h index fb86c2f..8924103 100644 --- a/clang/test/Analysis/Inputs/system-header-simulator.h +++ b/clang/test/Analysis/Inputs/system-header-simulator.h @@ -63,7 +63,9 @@ size_t strlen(const char *); char *strcpy(char *restrict, const char *restrict); char *strncpy(char *dst, const char *src, size_t n); +char *strsep(char **stringp, const char *delim); void *memcpy(void *dst, const void *src, size_t n); +void *memset(void *s, int c, size_t n); typedef unsigned long __darwin_pthread_key_t; typedef __darwin_pthread_key_t pthread_key_t; diff --git a/clang/test/Analysis/issue-55019.cpp b/clang/test/Analysis/issue-55019.cpp new file mode 100644 index 0000000..dfeb00a --- /dev/null +++ b/clang/test/Analysis/issue-55019.cpp @@ -0,0 +1,89 @@ +// Refer issue 55019 for more details. +// A supplemental test case of pr22954.c for other functions modeled in +// the CStringChecker. + +// RUN: %clang_analyze_cc1 %s -verify \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=unix \ +// RUN: -analyzer-checker=debug.ExprInspection + +#include "Inputs/system-header-simulator.h" +#include "Inputs/system-header-simulator-cxx.h" + +void *malloc(size_t); +void free(void *); + +struct mystruct { + void *ptr; + char arr[4]; +}; + +void clang_analyzer_dump(const void *); + +// CStringChecker::memsetAux +void fmemset() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + memset(x.arr, 0, sizeof(x.arr)); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalCopyCommon +void fmemcpy() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + memcpy(x.arr, "hi", 2); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalStrcpyCommon +void fstrcpy() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + strcpy(x.arr, "hi"); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +void fstrncpy() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + strncpy(x.arr, "hi", sizeof(x.arr)); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalStrsep +void fstrsep() { + mystruct x; + x.ptr = malloc(1); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + char *p = x.arr; + (void)strsep(&p, "x"); + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + free(x.ptr); // no-leak-warning +} + +// CStringChecker::evalStdCopyCommon +void fstdcopy() { + mystruct x; + x.ptr = new char; + clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}} + + const char *p = "x"; + std::copy(p, p + 1, x.arr); + + // FIXME: As we currently cannot know whether the copy overflows, the checker + // invalidates the entire `x` object. When the copy size through iterators + // can be correctly modeled, we can then update the verify direction from + // SymRegion to HeapSymRegion as this std::copy call never overflows and + // hence the pointer `x.ptr` shall not be invalidated. + clang_analyzer_dump(x.ptr); // expected-warning {{SymRegion}} + delete static_cast(x.ptr); // no-leak-warning +} diff --git a/clang/test/Analysis/pr22954.c b/clang/test/Analysis/pr22954.c index e9b8884..3d1cac1 100644 --- a/clang/test/Analysis/pr22954.c +++ b/clang/test/Analysis/pr22954.c @@ -556,12 +556,13 @@ int f263(int n, char * len) { x263.s2 = strdup("hello"); char input[] = {'a', 'b', 'c', 'd'}; memcpy(x263.s1, input, *(len + n)); - clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}}\ + expected-warning{{Potential leak of memory pointed to by 'x263.s2'}} clang_analyzer_eval(x263.s1[1] == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(x263.s1[2] == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(x263.s1[3] == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(x263.s2 == 0); // expected-warning{{UNKNOWN}} - return 0; // expected-warning{{Potential leak of memory pointed to by 'x263.s2'}} + return 0; } -- 2.7.4