From 1ca2d9679ba3493f017e5131451b3f68a708591b Mon Sep 17 00:00:00 2001
From: Faisal Vali <faisalv@yahoo.com>
Date: Mon, 15 May 2017 01:49:19 +0000
Subject: [PATCH] Fix PR32933: crash on lambda capture of VLA

https://bugs.llvm.org/show_bug.cgi?id=32933

Turns out clang wasn't really handling vla's (*) in C++11's for-range entirely correctly.

For e.g. This would lead to generation of buggy IR:

  void foo(int b) {
    int vla[b];
    b = -1;  // This store would affect the '__end = vla + b'
    for (int &c : vla)
      c = 0;
  }

Additionally, code-gen would get confused when VLA's were reference-captured by lambdas, and then used in a for-range, which would result in an attempt to generate IR for '__end = vla + b' within the lambda's body - without any capture of 'b' - hence the assertion.

This patch modifies clang, so that for VLA's it translates the end pointer approximately into:
  __end = __begin + sizeof(vla)/sizeof(vla->getElementType())

As opposed to the __end = __begin + b;

I considered passing a magic value into codegen - or having codegen special case the '__end' variable when it referred to a variably-modified type, but I decided against that approach, because it smelled like I would be increasing a complicated form of coupling, that I think would be even harder to maintain than the above approach (which can easily be optimized (-O1) to refer to the run-time bound that was calculated upon array's creation or copied into the lambda's closure object).


(*) why oh why gcc would you enable this by default?! ;)

llvm-svn: 303026
---
 clang/lib/Sema/SemaStmt.cpp               | 54 ++++++++++++++++++--
 clang/test/CodeGenCXX/vla.cpp             | 85 ++++++++++++++++++++++++++++++-
 clang/test/SemaCXX/for-range-examples.cpp | 34 +++++++++++++
 3 files changed, 169 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 5d7eada..33a8f9c 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -2268,9 +2268,57 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
         BoundExpr = IntegerLiteral::Create(
             Context, CAT->getSize(), Context.getPointerDiffType(), RangeLoc);
       else if (const VariableArrayType *VAT =
-               dyn_cast<VariableArrayType>(UnqAT))
-        BoundExpr = VAT->getSizeExpr();
-      else {
+               dyn_cast<VariableArrayType>(UnqAT)) {
+        // For a variably modified type we can't just use the expression within
+        // the array bounds, since we don't want that to be re-evaluated here.
+        // Rather, we need to determine what it was when the array was first
+        // created - so we resort to using sizeof(vla)/sizeof(element).
+        // For e.g.
+        //  void f(int b) { 
+        //    int vla[b];
+        //    b = -1;   <-- This should not affect the num of iterations below
+        //    for (int &c : vla) { .. }
+        //  }
+
+        // FIXME: This results in codegen generating IR that recalculates the
+        // run-time number of elements (as opposed to just using the IR Value
+        // that corresponds to the run-time value of each bound that was
+        // generated when the array was created.) If this proves too embarassing
+        // even for unoptimized IR, consider passing a magic-value/cookie to
+        // codegen that then knows to simply use that initial llvm::Value (that
+        // corresponds to the bound at time of array creation) within
+        // getelementptr.  But be prepared to pay the price of increasing a
+        // customized form of coupling between the two components - which  could
+        // be hard to maintain as the codebase evolves.
+
+        ExprResult SizeOfVLAExprR = ActOnUnaryExprOrTypeTraitExpr(
+            EndVar->getLocation(), UETT_SizeOf,
+            /*isType=*/true,
+            CreateParsedType(VAT->desugar(), Context.getTrivialTypeSourceInfo(
+                                                 VAT->desugar(), RangeLoc))
+                .getAsOpaquePtr(),
+            EndVar->getSourceRange());
+        if (SizeOfVLAExprR.isInvalid())
+          return StmtError();
+        
+        ExprResult SizeOfEachElementExprR = ActOnUnaryExprOrTypeTraitExpr(
+            EndVar->getLocation(), UETT_SizeOf,
+            /*isType=*/true,
+            CreateParsedType(VAT->desugar(),
+                             Context.getTrivialTypeSourceInfo(
+                                 VAT->getElementType(), RangeLoc))
+                .getAsOpaquePtr(),
+            EndVar->getSourceRange());
+        if (SizeOfEachElementExprR.isInvalid())
+          return StmtError();
+
+        BoundExpr =
+            ActOnBinOp(S, EndVar->getLocation(), tok::slash,
+                       SizeOfVLAExprR.get(), SizeOfEachElementExprR.get());
+        if (BoundExpr.isInvalid())
+          return StmtError();
+        
+      } else {
         // Can't be a DependentSizedArrayType or an IncompleteArrayType since
         // UnqAT is not incomplete and Range is not type-dependent.
         llvm_unreachable("Unexpected array type in for-range");
diff --git a/clang/test/CodeGenCXX/vla.cpp b/clang/test/CodeGenCXX/vla.cpp
index 4e22bba..798a5ae 100644
--- a/clang/test/CodeGenCXX/vla.cpp
+++ b/clang/test/CodeGenCXX/vla.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
 
 template<typename T>
 struct S {
@@ -54,3 +54,86 @@ void test0(void *array, int n) {
 
   // CHECK-NEXT: ret void
 }
+
+
+void test2(int b) {
+  // CHECK-LABEL: define void {{.*}}test2{{.*}}(i32 %b)
+  int varr[b];
+  // get the address of %b by checking the first store that stores it 
+  //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
+
+  // get the size of the VLA by getting the first load of the PTR_B
+  //CHECK: [[VLA_NUM_ELEMENTS_PREZEXT:%.*]] = load i32, i32* [[PTR_B]]
+  //CHECK-NEXT: [[VLA_NUM_ELEMENTS_PRE:%.*]] = zext i32 [[VLA_NUM_ELEMENTS_PREZEXT]]
+  
+  b = 15;
+  //CHECK: store i32 15, i32* [[PTR_B]]
+  
+  // Now get the sizeof, and then divide by the element size
+  
+  
+  //CHECK: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_NUM_ELEMENTS_PRE]]
+  //CHECK-NEXT: [[VLA_NUM_ELEMENTS_POST:%.*]] = udiv i64 [[VLA_SIZEOF]], 4
+  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]]
+  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  for (int d : varr) 0;
+}
+
+#if 0
+  %0 = load i32, i32* %b.addr, align 4
+  %1 = zext i32 %0 to i64
+  %2 = load i32, i32* %c.addr, align 4
+  %3 = zext i32 %2 to i64
+  %4 = call i8* @llvm.stacksave()
+  store i8* %4, i8** %saved_stack, align 8
+  %5 = mul nuw i64 %1, %3
+  %vla = alloca i32, i64 %5, align 16
+  store i32 15, i32* %c.addr, align 4
+  store i32 15, i32* %b.addr, align 4
+  store i32* %vla, i32** %__range, align 8
+
+
+  %6 = load i32*, i32** %__range, align 8
+  store i32* %6, i32** %__begin, align 8
+  %7 = load i32*, i32** %__range, align 8
+  %8 = mul nuw i64 %1, %3
+  %9 = mul nuw i64 4, %8
+  %10 = mul nuw i64 4, %3
+  %div = udiv i64 %9, %10
+  %vla.index = mul nsw i64 %div, %3
+  %add.ptr = getelementptr inbounds i32, i32* %7, i64 %vla.index
+  store i32* %add.ptr, i32** %__end, align 8
+#endif
+
+void test3(int b, int c) {
+  // CHECK-LABEL: define void {{.*}}test3{{.*}}(i32 %b, i32 %c)
+  int varr[b][c];
+  // get the address of %b by checking the first store that stores it 
+  //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
+  //CHECK-NEXT: store i32 %c, i32* [[PTR_C:%.*]]
+  
+  // get the size of the VLA by getting the first load of the PTR_B
+  //CHECK: [[VLA_DIM1_PREZEXT:%.*]] = load i32, i32* [[PTR_B]]
+  //CHECK-NEXT: [[VLA_DIM1_PRE:%.*]] = zext i32 [[VLA_DIM1_PREZEXT]]
+  //CHECK: [[VLA_DIM2_PREZEXT:%.*]] = load i32, i32* [[PTR_C]]
+  //CHECK-NEXT: [[VLA_DIM2_PRE:%.*]] = zext i32 [[VLA_DIM2_PREZEXT]]
+  
+  b = 15;
+  c = 15;
+  //CHECK: store i32 15, i32* [[PTR_B]]
+  //CHECK: store i32 15, i32* [[PTR_C]]
+  // Now get the sizeof, and then divide by the element size
+  
+  // multiply the two dimensions, then by the element type and then divide by the sizeof dim2
+  //CHECK: [[VLA_DIM1_X_DIM2:%.*]] = mul nuw i64 [[VLA_DIM1_PRE]], [[VLA_DIM2_PRE]]
+  //CHECK-NEXT: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_DIM1_X_DIM2]]
+  //CHECK-NEXT: [[VLA_SIZEOF_DIM2:%.*]] = mul nuw i64 4, [[VLA_DIM2_PRE]]
+  //CHECK-NEXT: [[VLA_NUM_ELEMENTS:%.*]] = udiv i64 [[VLA_SIZEOF]], [[VLA_SIZEOF_DIM2]]
+  //CHECK-NEXT: [[VLA_END_INDEX:%.*]] = mul nsw i64 [[VLA_NUM_ELEMENTS]], [[VLA_DIM2_PRE]]
+  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* %7, i64 [[VLA_END_INDEX]]
+  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+ 
+  for (auto &d : varr) 0;
+}
+
+
diff --git a/clang/test/SemaCXX/for-range-examples.cpp b/clang/test/SemaCXX/for-range-examples.cpp
index 08a9982..d6b527f 100644
--- a/clang/test/SemaCXX/for-range-examples.cpp
+++ b/clang/test/SemaCXX/for-range-examples.cpp
@@ -241,3 +241,37 @@ namespace pr18587 {
     }
   }
 }
+
+namespace PR32933 {
+// https://bugs.llvm.org/show_bug.cgi?id=32933
+void foo ()
+{ 
+  int b = 1, a[b];
+  a[0] = 0;
+  [&] { for (int c : a) 0; } ();
+}
+
+
+int foo(int b) {
+  int varr[b][(b+=8)];
+  b = 15; 
+  [&] {
+    int i = 0;
+    for (auto &c : varr) 
+    {
+      c[0] = ++b;
+    }
+    [&] {
+      int i = 0;
+      for (auto &c : varr) {
+        int j = 0;
+        for(auto &c2 : c) {
+          ++j;
+        }
+        ++i;
+      }
+    }();
+  }();
+  return b;
+}
+}
\ No newline at end of file
-- 
2.7.4