[dfsan] Add a flag about whether to propagate offset labels at gep
authorJianzhou Zhao <jianzhouzh@google.com>
Wed, 26 May 2021 22:51:54 +0000 (22:51 +0000)
committerJianzhou Zhao <jianzhouzh@google.com>
Fri, 28 May 2021 00:06:19 +0000 (00:06 +0000)
DFSan has flags to control flows between pointers and objects referred
by pointers. For example,

a = *p;
L(a) = L(*p)        when -dfsan-combine-pointer-labels-on-load = false
L(a) = L(*p) + L(p) when -dfsan-combine-pointer-labels-on-load = true

*p = b;
L(*p) = L(b)        when -dfsan-combine-pointer-labels-on-store = false
L(*p) = L(b) + L(p) when -dfsan-combine-pointer-labels-on-store = true
The question is what to do with p += c.

In practice we found many confusing flows if we propagate labels from c
to p. So a new flag works like this

p += c;
L(p) = L(p)        when -dfsan-propagate-via-pointer-arithmetic = false
L(p) = L(p) + L(c) when -dfsan-propagate-via-pointer-arithmetic = true

Reviewed-by: gbalats
Differential Revision: https://reviews.llvm.org/D103176

compiler-rt/test/dfsan/gep.c [new file with mode: 0644]
llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll [new file with mode: 0644]

diff --git a/compiler-rt/test/dfsan/gep.c b/compiler-rt/test/dfsan/gep.c
new file mode 100644 (file)
index 0000000..7f358cb
--- /dev/null
@@ -0,0 +1,28 @@
+// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -o %t && %run %t
+// RUN: %clang_dfsan %s -DPROP_OFFSET_LABELS -o %t && %run %t
+//
+// REQUIRES: x86_64-target-arch
+
+// Tests that labels are propagated through GEP.
+
+#include <sanitizer/dfsan_interface.h>
+#include <assert.h>
+
+int main(void) {
+  int i = 1;
+  int *p = &i;
+  int j = 2;
+  // test that pointer arithmetic propagates labels in terms of the flag.
+  dfsan_set_label(1, &i, sizeof(i));
+  p += i;
+#ifdef PROP_OFFSET_LABELS
+  assert(dfsan_get_label(p) == 1);
+#else
+  assert(dfsan_get_label(p) == 0);
+#endif
+  // test that non-pointer operations always propagate labels.
+  dfsan_set_label(2, &j, sizeof(j));
+  j += i;
+  assert(dfsan_get_label(j) == 3);
+  return 0;
+}
index 2c468f8..a461813 100644 (file)
@@ -201,6 +201,14 @@ static cl::opt<bool> ClCombinePointerLabelsOnStore(
              "storing in memory."),
     cl::Hidden, cl::init(false));
 
+// Controls whether the pass propagates labels of offsets in GEP instructions.
+static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
+    "dfsan-combine-offset-labels-on-gep",
+    cl::desc(
+        "Combine the label of the offset with the label of the pointer when "
+        "doing pointer arithmetic."),
+    cl::Hidden, cl::init(true));
+
 static cl::opt<bool> ClDebugNonzeroLabels(
     "dfsan-debug-nonzero-labels",
     cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
@@ -2778,7 +2786,17 @@ void DFSanVisitor::visitCmpInst(CmpInst &CI) {
 }
 
 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
-  visitInstOperands(GEPI);
+  if (ClCombineOffsetLabelsOnGEP) {
+    visitInstOperands(GEPI);
+    return;
+  }
+
+  // Only propagate shadow/origin of base pointer value but ignore those of
+  // offset operands.
+  Value *BasePointer = GEPI.getPointerOperand();
+  DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
+  if (DFSF.DFS.shouldTrackOrigins())
+    DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
 }
 
 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll b/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll
new file mode 100644 (file)
index 0000000..5fe6353
--- /dev/null
@@ -0,0 +1,21 @@
+; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-combine-offset-labels-on-gep=false -dfsan-fast-8-labels=true -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN
+; RUN: opt < %s -dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-fast-8-labels=true -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
+; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]]
+; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]]
+
+define i32* @gepop([10 x [20 x i32]]* %p, i32 %a, i32 %b, i32 %c) {
+  ; CHECK: @"dfs$gepop"
+  ; CHECK_ORIGIN: %[[#PO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align [[ALIGN_O:4]]
+  ; CHECK: %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([[TLS_ARR]]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN_S:2]]
+  ; CHECK: %e = getelementptr [10 x [20 x i32]], [10 x [20 x i32]]* %p, i32 %a, i32 %b, i32 %c
+  ; CHECK: store i[[#SBITS]] %[[#PS]], i[[#SBITS]]* bitcast ([[TLS_ARR]]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN_S]]
+  ; CHECK_ORIGIN: store i32 %[[#PO]], i32* @__dfsan_retval_origin_tls, align [[ALIGN_O]]
+
+  %e = getelementptr [10 x [20 x i32]], [10 x [20 x i32]]* %p, i32 %a, i32 %b, i32 %c
+  ret i32* %e
+}
+