From 539e4a77bbabbc19f22b2bd24e04af2e432e599d Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Sat, 23 Feb 2013 02:53:19 +0000 Subject: [PATCH] ubsan: Emit bounds checks for array indexing, vector indexing, and (in really simple cases) pointer arithmetic. This augments the existing bounds checking with language-level array bounds information. llvm-svn: 175949 --- clang/lib/CodeGen/CGExpr.cpp | 97 +++++++++++++++++++++++++- clang/lib/CodeGen/CGExprScalar.cpp | 11 ++- clang/lib/CodeGen/CodeGenFunction.h | 9 ++- clang/lib/Driver/SanitizerArgs.h | 2 +- clang/test/CodeGenCXX/catch-undef-behavior.cpp | 91 +++++++++++++++++++++++- clang/test/Driver/darwin-sanitizer-ld.c | 6 +- clang/test/Driver/ubsan-ld.c | 8 --- 7 files changed, 207 insertions(+), 17 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ba400b8..a688dbf 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -630,6 +630,83 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, } } +/// Determine whether this expression refers to a flexible array member in a +/// struct. We disable array bounds checks for such members. +static bool isFlexibleArrayMemberExpr(const Expr *E) { + // For compatibility with existing code, we treat arrays of length 0 or + // 1 as flexible array members. + const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe(); + if (const ConstantArrayType *CAT = dyn_cast(AT)) { + if (CAT->getSize().ugt(1)) + return false; + } else if (!isa(AT)) + return false; + + E = E->IgnoreParens(); + + // A flexible array member must be the last member in the class. + if (const MemberExpr *ME = dyn_cast(E)) { + // FIXME: If the base type of the member expr is not FD->getParent(), + // this should not be treated as a flexible array member access. + if (const FieldDecl *FD = dyn_cast(ME->getMemberDecl())) { + RecordDecl::field_iterator FI( + DeclContext::decl_iterator(const_cast(FD))); + return ++FI == FD->getParent()->field_end(); + } + } + + return false; +} + +/// If Base is known to point to the start of an array, return the length of +/// that array. Return 0 if the length cannot be determined. +llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, const Expr *Base, + QualType &IndexedType) { + // For the vector indexing extension, the bound is the number of elements. + if (const VectorType *VT = Base->getType()->getAs()) { + IndexedType = Base->getType(); + return CGF.Builder.getInt32(VT->getNumElements()); + } + + Base = Base->IgnoreParens(); + + if (const CastExpr *CE = dyn_cast(Base)) { + if (CE->getCastKind() == CK_ArrayToPointerDecay && + !isFlexibleArrayMemberExpr(CE->getSubExpr())) { + IndexedType = CE->getSubExpr()->getType(); + const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe(); + if (const ConstantArrayType *CAT = dyn_cast(AT)) + return CGF.Builder.getInt(CAT->getSize()); + else if (const VariableArrayType *VAT = cast(AT)) + return CGF.getVLASize(VAT).first; + } + } + + return 0; +} + +void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, + llvm::Value *Index, QualType IndexType, + bool Accessed) { + QualType IndexedType; + llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType); + if (!Bound) + return; + + bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); + llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); + llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false); + + llvm::Constant *StaticData[] = { + EmitCheckSourceLocation(E->getExprLoc()), + EmitCheckTypeDescriptor(IndexedType), + EmitCheckTypeDescriptor(IndexType) + }; + llvm::Value *Check = Accessed ? Builder.CreateICmpULT(IndexVal, BoundVal) + : Builder.CreateICmpULE(IndexVal, BoundVal); + EmitCheck(Check, "out_of_bounds", StaticData, Index, CRK_Recoverable); +} + CodeGenFunction::ComplexPairTy CodeGenFunction:: EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, @@ -705,7 +782,11 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, } LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { - LValue LV = EmitLValue(E); + LValue LV; + if (SanOpts->Bounds && isa(E)) + LV = EmitArraySubscriptExpr(cast(E), /*Accessed*/true); + else + LV = EmitLValue(E); if (!isa(E) && !LV.isBitField() && LV.isSimple()) EmitTypeCheck(TCK, E->getExprLoc(), LV.getAddress(), E->getType(), LV.getAlignment()); @@ -2121,12 +2202,16 @@ static const Expr *isSimpleArrayDecayOperand(const Expr *E) { return SubExpr; } -LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) { +LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, + bool Accessed) { // The index must always be an integer, which is not an aggregate. Emit it. llvm::Value *Idx = EmitScalarExpr(E->getIdx()); QualType IdxTy = E->getIdx()->getType(); bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType(); + if (SanOpts->Bounds) + EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed); + // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. if (E->getBase()->getType()->isVectorType()) { @@ -2187,7 +2272,13 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) { // "gep x, i" here. Emit one "gep A, 0, i". assert(Array->getType()->isArrayType() && "Array to pointer decay must have array source type!"); - LValue ArrayLV = EmitLValue(Array); + LValue ArrayLV; + // For simple multidimensional array indexing, set the 'accessed' flag for + // better bounds-checking of the base expression. + if (const ArraySubscriptExpr *ASE = dyn_cast(Array)) + ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true); + else + ArrayLV = EmitLValue(Array); llvm::Value *ArrayPtr = ArrayLV.getAddress(); llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); llvm::Value *Args[] = { Zero, Idx }; diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 69aa7e8..d76cad2 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -986,7 +986,12 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { // integer value. Value *Base = Visit(E->getBase()); Value *Idx = Visit(E->getIdx()); - bool IdxSigned = E->getIdx()->getType()->isSignedIntegerOrEnumerationType(); + QualType IdxTy = E->getIdx()->getType(); + + if (CGF.SanOpts->Bounds) + CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true); + + bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType(); Idx = Builder.CreateIntCast(Idx, CGF.Int32Ty, IdxSigned, "vecidxcast"); return Builder.CreateExtractElement(Base, Idx, "vecext"); } @@ -2134,6 +2139,10 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (isSubtraction) index = CGF.Builder.CreateNeg(index, "idx.neg"); + if (CGF.SanOpts->Bounds) + CGF.EmitBoundsCheck(op.E, pointerOperand, index, indexOperand->getType(), + /*Accessed*/ false); + const PointerType *pointerType = pointerOperand->getType()->getAs(); if (!pointerType) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index fce5c49..880b82e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1910,6 +1910,12 @@ public: void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero()); + /// \brief Emit a check that \p Base points into an array object, which + /// we can access at index \p Index. \p Accessed should be \c false if we + /// this expression is used as an lvalue, for instance in "&Arr[Idx]". + void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, + QualType IndexType, bool Accessed); + llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, @@ -2187,7 +2193,8 @@ public: LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E); LValue EmitPredefinedLValue(const PredefinedExpr *E); LValue EmitUnaryOpLValue(const UnaryOperator *E); - LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E); + LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E, + bool Accessed = false); LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E); LValue EmitMemberExpr(const MemberExpr *E); LValue EmitObjCIsaExpr(const ObjCIsaExpr *E); diff --git a/clang/lib/Driver/SanitizerArgs.h b/clang/lib/Driver/SanitizerArgs.h index 8e3f946..c3d84f9 100644 --- a/clang/lib/Driver/SanitizerArgs.h +++ b/clang/lib/Driver/SanitizerArgs.h @@ -37,7 +37,7 @@ class SanitizerArgs { NeedsAsanRt = Address, NeedsTsanRt = Thread, NeedsMsanRt = Memory, - NeedsUbsanRt = (Undefined & ~Bounds) | Integer, + NeedsUbsanRt = Undefined | Integer, NotAllowedWithTrap = Vptr }; unsigned Kind; diff --git a/clang/test/CodeGenCXX/catch-undef-behavior.cpp b/clang/test/CodeGenCXX/catch-undef-behavior.cpp index b2d6fce..044b92b 100644 --- a/clang/test/CodeGenCXX/catch-undef-behavior.cpp +++ b/clang/test/CodeGenCXX/catch-undef-behavior.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,bounds -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s struct S { double d; @@ -220,4 +220,93 @@ void bad_downcast_reference(S &p) { (void) static_cast(p); } +// CHECK: @_Z11array_index +int array_index(const int (&a)[4], int n) { + // CHECK: %[[K1_OK:.*]] = icmp ult i64 %{{.*}}, 4 + // CHECK: br i1 %[[K1_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + int k1 = a[n]; + + // CHECK: %[[R1_OK:.*]] = icmp ule i64 %{{.*}}, 4 + // CHECK: br i1 %[[R1_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + const int *r1 = &a[n]; + + // CHECK: %[[K2_OK:.*]] = icmp ult i64 %{{.*}}, 8 + // CHECK: br i1 %[[K2_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + int k2 = ((const int(&)[8])a)[n]; + + // CHECK: %[[K3_OK:.*]] = icmp ult i64 %{{.*}}, 4 + // CHECK: br i1 %[[K3_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + int k3 = n[a]; + + return k1 + *r1 + k2; +} + +// CHECK: @_Z17multi_array_index +int multi_array_index(int n, int m) { + int arr[4][6]; + + // CHECK: %[[IDX2_OK:.*]] = icmp ult i64 %{{.*}}, 6 + // CHECK: br i1 %[[IDX2_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + + // CHECK: %[[IDX1_OK:.*]] = icmp ult i64 %{{.*}}, 4 + // CHECK: br i1 %[[IDX1_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + return arr[n][m]; +} + +// CHECK: @_Z11array_arith +int array_arith(const int (&a)[4], int n) { + // CHECK: %[[K1_OK:.*]] = icmp ule i64 %{{.*}}, 4 + // CHECK: br i1 %[[K1_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + const int *k1 = a + n; + + // CHECK: %[[K2_OK:.*]] = icmp ule i64 %{{.*}}, 8 + // CHECK: br i1 %[[K2_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + const int *k2 = (const int(&)[8])a + n; + + return *k1 + *k2; +} + +struct ArrayMembers { + int a1[5]; + int a2[1]; +}; +// CHECK: @_Z18struct_array_index +int struct_array_index(ArrayMembers *p, int n) { + // CHECK: %[[IDX_OK:.*]] = icmp ult i64 %{{.*}}, 5 + // CHECK: br i1 %[[IDX_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + return p->a1[n]; +} + +// CHECK: @_Z16flex_array_index +int flex_array_index(ArrayMembers *p, int n) { + // CHECK-NOT: call void @__ubsan_handle_out_of_bounds( + return p->a2[n]; +} + +typedef __attribute__((ext_vector_type(4))) int V4I; +// CHECK: @_Z12vector_index +int vector_index(V4I v, int n) { + // CHECK: %[[IDX_OK:.*]] = icmp ult i64 %{{.*}}, 4 + // CHECK: br i1 %[[IDX_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + return v[n]; +} + +// CHECK: @_Z12string_index +char string_index(int n) { + // CHECK: %[[IDX_OK:.*]] = icmp ult i64 %{{.*}}, 6 + // CHECK: br i1 %[[IDX_OK]] + // CHECK: call void @__ubsan_handle_out_of_bounds( + return "Hello"[n]; +} + // CHECK: attributes [[NR_NUW]] = { noreturn nounwind } diff --git a/clang/test/Driver/darwin-sanitizer-ld.c b/clang/test/Driver/darwin-sanitizer-ld.c index e4134c9..98b37e9 100644 --- a/clang/test/Driver/darwin-sanitizer-ld.c +++ b/clang/test/Driver/darwin-sanitizer-ld.c @@ -28,7 +28,8 @@ // CHECK-UBSAN: stdc++ // RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \ -// RUN: -fsanitize=bounds %s -o %t.o 2>&1 \ +// RUN: -fsanitize=bounds -fsanitize-undefined-trap-on-error \ +// RUN: %s -o %t.o 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-BOUNDS %s // CHECK-BOUNDS: "{{.*}}ld{{(.exe)?}}" @@ -43,7 +44,8 @@ // CHECK-DYN-UBSAN: libclang_rt.ubsan_osx.a // RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \ -// RUN: -fPIC -shared -fsanitize=bounds %s -o %t.so 2>&1 \ +// RUN: -fsanitize=bounds -fsanitize-undefined-trap-on-error \ +// RUN: %s -o %t.so -fPIC -shared 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-DYN-BOUNDS %s // CHECK-DYN-BOUNDS: "{{.*}}ld{{(.exe)?}}" diff --git a/clang/test/Driver/ubsan-ld.c b/clang/test/Driver/ubsan-ld.c index 133d7a3..7f601ab 100644 --- a/clang/test/Driver/ubsan-ld.c +++ b/clang/test/Driver/ubsan-ld.c @@ -18,11 +18,3 @@ // CHECK-LINUX-SHARED-NOT: "-lc" // CHECK-LINUX-SHARED: libclang_rt.ubsan-i386.a" // CHECK-LINUX-SHARED: "-lpthread" - -// RUN: %clang -fsanitize=bounds %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: --sysroot=%S/Inputs/basic_linux_tree \ -// RUN: | FileCheck --check-prefix=CHECK-LINUX1 %s -// CHECK-LINUX1: "{{.*}}ld{{(.exe)?}}" -// CHECK-LINUX1-NOT: libclang_rt.ubsan-i386.a" -// CHECK-LINUX1-NOT: "-lpthread" -- 2.7.4