From 5857b701a3bf45baa5fa5ba99fef725615719f6b Mon Sep 17 00:00:00 2001
From: Tobias Grosser <tobias@grosser.es>
Date: Mon, 12 Sep 2016 06:06:31 +0000
Subject: [PATCH] GPGPU: Bail out gracefully in case of invalid IR

Instead of aborting, we now bail out gracefully in case the kernel IR we
generate is invalid. This can currently happen in case the SCoP stores
pointer values, which we model as arrays, as data values into other arrays. In
this case, the original pointer value is not available on the device and can
consequently not be stored. As detecting this ahead of time is not so easy, we
detect these situations after the invalid IR has been generated and bail out.

llvm-svn: 281193
---
 polly/lib/CodeGen/PPCGCodeGeneration.cpp | 17 ++++--
 polly/test/GPGPU/invalid-kernel.ll       | 78 ++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 4 deletions(-)
 create mode 100644 polly/test/GPGPU/invalid-kernel.ll

diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 66ffa730accb..783d256a32cc 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -152,6 +152,12 @@ public:
   /// Finalize the generated scop.
   virtual void finalize();
 
+  /// Track if the full build process was successful.
+  ///
+  /// This value is set to false, if throughout the build process an error
+  /// occurred which prevents us from generating valid GPU code.
+  bool BuildSuccessful = true;
+
 private:
   /// A vector of array base pointers for which a new ScopArrayInfo was created.
   ///
@@ -1409,10 +1415,10 @@ std::string GPUNodeBuilder::createKernelASM() {
 }
 
 std::string GPUNodeBuilder::finalizeKernelFunction() {
-  // Verify module.
-  llvm::legacy::PassManager Passes;
-  Passes.add(createVerifierPass());
-  Passes.run(*GPUModule);
+  if (verifyModule(*GPUModule)) {
+    BuildSuccessful = false;
+    return "";
+  }
 
   if (DumpKernelIR)
     outs() << *GPUModule << "\n";
@@ -2139,6 +2145,9 @@ public:
     NodeBuilder.initializeAfterRTH();
     NodeBuilder.create(Root);
     NodeBuilder.finalize();
+
+    if (!NodeBuilder.BuildSuccessful)
+      SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
   }
 
   bool runOnScop(Scop &CurrentScop) override {
diff --git a/polly/test/GPGPU/invalid-kernel.ll b/polly/test/GPGPU/invalid-kernel.ll
new file mode 100644
index 000000000000..298f6a589a6f
--- /dev/null
+++ b/polly/test/GPGPU/invalid-kernel.ll
@@ -0,0 +1,78 @@
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=CODE %s
+
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -disable-output < %s | \
+; RUN: not FileCheck %s -check-prefix=KERNEL-IR
+
+; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
+; RUN: FileCheck %s -check-prefix=IR
+
+; REQUIRES: pollyacc
+;
+;    void foo(long A[1024], long B[1024]) {
+;      for (long i = 0; i < 1024; i++)
+;        A[i] += (B[i] + (long)&B[i]);
+;    }
+
+; This kernel loads/stores a pointer address we model. This is a rare case,
+; were we still lack proper code-generation support. We check here that we
+; detect the invalid IR and bail out gracefully.
+
+; CODE: Code
+; CODE-NEXT: ====
+; CODE-NEXT: # host
+; CODE-NEXT: {
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
+; CODE-NEXT:   {
+; CODE-NEXT:     dim3 k0_dimBlock(32);
+; CODE-NEXT:     dim3 k0_dimGrid(32);
+; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B, dev_MemRef_A);
+; CODE-NEXT:     cudaCheckKernel();
+; CODE-NEXT:   }
+
+; CODE:   cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i64), cudaMemcpyDeviceToHost));
+; CODE-NEXT: }
+
+; CODE: # kernel0
+; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
+
+; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
+; RUN: FileCheck %s -check-prefix=IR
+
+; KERNEL-IR: kernel
+
+; IR: br i1 false, label %polly.start, label %bb1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i64* %A, i64* %B) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb10, %bb
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp11, %bb10 ]
+  %exitcond = icmp ne i64 %i.0, 1024
+  br i1 %exitcond, label %bb2, label %bb12
+
+bb2:                                              ; preds = %bb1
+  %tmp = getelementptr inbounds i64, i64* %B, i64 %i.0
+  %tmp3 = load i64, i64* %tmp, align 8
+  %tmp4 = getelementptr inbounds i64, i64* %B, i64 %i.0
+  %tmp5 = ptrtoint i64* %tmp4 to i64
+  %tmp6 = add nsw i64 %tmp3, %tmp5
+  %tmp7 = getelementptr inbounds i64, i64* %A, i64 %i.0
+  %tmp8 = load i64, i64* %tmp7, align 8
+  %tmp9 = add nsw i64 %tmp8, %tmp6
+  store i64 %tmp9, i64* %tmp7, align 8
+  br label %bb10
+
+bb10:                                             ; preds = %bb2
+  %tmp11 = add nuw nsw i64 %i.0, 1
+  br label %bb1
+
+bb12:                                             ; preds = %bb1
+  ret void
+}
-- 
2.34.1