From: bsegovia <segovia.benjamin@gmail.com>
Date: Mon, 13 Aug 2012 16:33:09 +0000 (+0000)
Subject: Fixed a bug with LLVM alloca
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=50af9ec38b1e7589e87fa4d488e962690ad2fb8e;p=contrib%2Fbeignet.git

Fixed a bug with LLVM alloca
Fixed CFG linearization
Fixed a bug with function stack
---

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 0083e46..79cbd54 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -409,12 +409,6 @@ namespace gbe
     });
   }
 
-  // The idea is that foward branches can by-pass the target of previous
-  // forward branches. Since we run in SIMD mode, we must be sure that we are
-  // not skipping some computations. The idea is therefore to put JOIN points at
-  // the head of each block and to restrict the distance where to jump when
-  // taking a forward branch. We traverse the blocks top to bottom and use a
-  // O(n^2) stupid algorithm to track down which branches we can by-pass
   void Context::buildJIPs(void) {
     using namespace ir;
 
@@ -427,7 +421,6 @@ namespace gbe
     // If some blocks are unused we mark them as such by setting their own label
     // as "invalid" (== noTarget)
     for (auto &bb : braTargets) bb = std::make_pair(noTarget, noTarget);
-
     fn.foreachBlock([&](const BasicBlock &bb) {
       const LabelIndex ownLabel = bb.getLabelIndex();
       const Instruction *last = bb.getLastInstruction();
@@ -439,71 +432,42 @@ namespace gbe
       }
     });
 
-    // For each block, we also figure out if the JOIN point (at the label
-    // instruction location) needs a branch to bypass useless computations
-    vector<LabelIndex> joinTargets;
-    joinTargets.resize(fn.labelNum());
-    for (auto &bb : joinTargets) bb = noTarget;
-
-    // We store here the labels bypassed by the current branch
-    vector<LabelIndex> bypassedLabels;
-    bypassedLabels.resize(blockNum);
+    // Stores the current forward targets
+    set<LabelIndex> fwdTargets;
 
     // Now retraverse the blocks and figure out all JIPs
     for (int32_t blockID = 0; blockID < blockNum; ++blockID) {
       const LabelIndex ownLabel = braTargets[blockID].first;
       const LabelIndex target = braTargets[blockID].second;
       const BasicBlock &bb = fn.getBlock(ownLabel);
-      const Instruction *insn = bb.getLastInstruction();
+      const Instruction *label = bb.getFirstInstruction();
+      const Instruction *bra = bb.getLastInstruction();
+
+      // Expires the branches that point to us (if any)
+      auto it = fwdTargets.find(ownLabel);
+      if (it != fwdTargets.end()) fwdTargets.erase(it);
+
+      // If there is an outstanding forward branch, compute a JIP for the label
+      auto lower = fwdTargets.lower_bound(LabelIndex(0));
+      GBE_ASSERT(label->isMemberOf<LabelInstruction>() == true);
+      if (lower != fwdTargets.end())
+        JIPs.insert(std::make_pair(label, *lower));
+
+      // Handle special cases and backward branches first
       if (ownLabel == noTarget) continue; // unused block
       if (target == noTarget) continue; // no branch at all
-      GBE_ASSERT(insn->isMemberOf<BranchInstruction>() == true);
+      GBE_ASSERT(bra->isMemberOf<BranchInstruction>() == true);
       if (target <= ownLabel) { // bwd branch: we always jump
-        JIPs.insert(std::make_pair(insn, LabelIndex(target)));
+        JIPs.insert(std::make_pair(bra, LabelIndex(target)));
         continue;
       }
 
-      // Traverse all previous blocks and see if we bypass their target
-      uint32_t bypassedNum = 0;
-      uint32_t JIP = target;
-      for (int32_t prevID = blockID-1; prevID >= 0; --prevID) {
-        const LabelIndex prevTarget = braTargets[prevID].second;
-        if (prevTarget == noTarget) continue; // no branch
-        if (prevTarget >= target) continue; // complete bypass
-        if (prevTarget <= ownLabel) continue; // branch falls before
-        bypassedLabels[bypassedNum++] = prevTarget;
-        JIP = min(uint32_t(JIP), uint32_t(prevTarget));
-      }
-
-      // We now have the (possibly) updated JIP for the branch
-      JIPs.insert(std::make_pair(insn, LabelIndex(JIP)));
-
-      // No bypassed targets
-      if (bypassedNum == 0) continue;
-
-      // When we have several bypassed targets, we must simply sort them and
-      // chain them such target_n points to target_{n+1}
-      bypassedLabels[bypassedNum++] = target;
-      std::sort(&bypassedLabels[0], &bypassedLabels[bypassedNum]);
-
-      // Bypassed labels have a JIP now. However, we will only insert the
-      // instructions later since *several* branches can bypass the same label.
-      // For that reason, we must consider the *minimum* JIP
-      for (uint32_t bypassedID = 0; bypassedID < bypassedNum-1; ++bypassedID) {
-        const LabelIndex curr = bypassedLabels[bypassedID];
-        const LabelIndex next = bypassedLabels[bypassedID+1];
-        joinTargets[curr] = min(joinTargets[curr], next);
-      }
+      // This is a forward jump, register it and get the JIP
+      fwdTargets.insert(target);
+      auto jip = fwdTargets.lower_bound(LabelIndex(0));
+      JIPs.insert(std::make_pair(bra, *jip));
     }
 
-    // Now we also processed all JOIN points (i.e. each label). We can insert
-    // the label instructions that have a JIP
-    for (uint32_t label = 0; label < fn.labelNum(); ++label) {
-      const LabelIndex target = joinTargets[label];
-      if (target == noTarget) continue;
-      const Instruction *insn = fn.getLabelInstruction(LabelIndex(label));
-      JIPs.insert(std::make_pair(insn, target));
-    }
   }
 
   bool Context::isScalarReg(const ir::Register &reg) const {
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 0eb6b8d..3e04837 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -21,7 +21,6 @@
  * \file gen_context.cpp
  * \author Benjamin Segovia <benjamin.segovia@intel.com>
  */
-
 #include "backend/gen_context.hpp"
 #include "backend/gen_program.hpp"
 #include "backend/gen_defs.hpp"
@@ -110,7 +109,7 @@ namespace gbe
     // Check that everything is consistent in the kernel code
     const uint32_t perLaneSize = kernel->getStackSize();
     const uint32_t perThreadSize = perLaneSize * this->simdWidth;
-    const int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+    const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
     GBE_ASSERT(perLaneSize > 0);
     GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
     GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 3fbd952..9f78cc4 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1405,16 +1405,19 @@ namespace gbe
     const ir::Register reg = ctx.reg(pointerFamily);
     const ir::Immediate imm = ctx.getImmediate(immIndex);
 
+    // Set the destination register properly
+    ctx.MOV(imm.type, dst, stack);
+
     // Easy case, we just increment the stack pointer
     if (needMultiply == false) {
       ctx.LOADI(imm.type, reg, immIndex);
-      ctx.ADD(imm.type, dst, stack, reg);
+      ctx.ADD(imm.type, stack, stack, reg);
     }
     // Harder case (variable length array) that requires a multiply
     else {
       ctx.LOADI(imm.type, reg, immIndex);
       ctx.MUL(imm.type, reg, this->getRegister(src), reg);
-      ctx.ADD(imm.type, dst, stack, reg);
+      ctx.ADD(imm.type, stack, stack, reg);
     }
   }
 
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 22c3317..21193a5 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -53,7 +53,9 @@ namespace gbe
     // Get the global LLVM context
     llvm::LLVMContext& c = llvm::getGlobalContext();
     std::string errInfo;
-    std::unique_ptr<llvm::raw_fd_ostream> o(new llvm::raw_fd_ostream("-", errInfo));
+    std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
+    if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM)
+      o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream("-", errInfo));
 
     // Get the module from its file
     SMDiagnostic Err;
@@ -82,9 +84,11 @@ namespace gbe
     passes.run(mod);
 
     // raw_fd_ostream closes stdout. We must reopen it
-    o = NULL;
-    const int fd = open("/dev/tty", O_WRONLY);
-    stdout = fdopen(fd, "w");
+    if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM) {
+      o = NULL;
+      const int fd = open("/dev/tty", O_WRONLY);
+      stdout = fdopen(fd, "w");
+    }
 
     return true;
   }
diff --git a/utests/Makefile b/utests/Makefile
index 13700ac..6455c37 100644
--- a/utests/Makefile
+++ b/utests/Makefile
@@ -11,7 +11,6 @@ C_SRC=utest_error.c
 CPP_SRC=\
 	app_mandelbrot.cpp \
 	compiler_argument_structure.cpp \
-	compiler_argument_structure_indirect.cpp \
 	compiler_array0.cpp \
 	compiler_array.cpp \
 	compiler_byte_scatter.cpp \
@@ -21,7 +20,6 @@ CPP_SRC=\
 	compiler_function_argument1.cpp \
 	compiler_function_argument.cpp \
 	compiler_if_else.cpp \
-	compiler_local_slm.cpp \
 	compiler_lower_return0.cpp \
 	compiler_lower_return1.cpp \
 	compiler_lower_return2.cpp \
@@ -43,6 +41,10 @@ CPP_SRC=\
 	utest_file_map.cpp \
 	utest_helper.cpp
 
+# unsupported for now
+# compiler_local_slm.cpp
+#compiler_argument_structure_indirect.cpp
+
 OBJ=$(C_SRC:.c=.o) $(CPP_SRC:.cpp=.o)
 
 all: utest_run runtime_flat_address_space
@@ -56,4 +58,4 @@ runtime_flat_address_space: $(OBJ) runtime_flat_address_space.o
 clean:
 	rm -f $(OBJ)
 	rm -f utest_run utest_run.o
-	rm -f runtime_flat_address_space runtime_flat_address_space.o
\ No newline at end of file
+	rm -f runtime_flat_address_space runtime_flat_address_space.o