From: bsegovia Date: Mon, 13 Aug 2012 16:33:09 +0000 (+0000) Subject: Fixed a bug with LLVM alloca X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=50af9ec38b1e7589e87fa4d488e962690ad2fb8e;p=contrib%2Fbeignet.git Fixed a bug with LLVM alloca Fixed CFG linearization Fixed a bug with function stack --- diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 0083e46..79cbd54 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -409,12 +409,6 @@ namespace gbe }); } - // The idea is that foward branches can by-pass the target of previous - // forward branches. Since we run in SIMD mode, we must be sure that we are - // not skipping some computations. The idea is therefore to put JOIN points at - // the head of each block and to restrict the distance where to jump when - // taking a forward branch. We traverse the blocks top to bottom and use a - // O(n^2) stupid algorithm to track down which branches we can by-pass void Context::buildJIPs(void) { using namespace ir; @@ -427,7 +421,6 @@ namespace gbe // If some blocks are unused we mark them as such by setting their own label // as "invalid" (== noTarget) for (auto &bb : braTargets) bb = std::make_pair(noTarget, noTarget); - fn.foreachBlock([&](const BasicBlock &bb) { const LabelIndex ownLabel = bb.getLabelIndex(); const Instruction *last = bb.getLastInstruction(); @@ -439,71 +432,42 @@ namespace gbe } }); - // For each block, we also figure out if the JOIN point (at the label - // instruction location) needs a branch to bypass useless computations - vector joinTargets; - joinTargets.resize(fn.labelNum()); - for (auto &bb : joinTargets) bb = noTarget; - - // We store here the labels bypassed by the current branch - vector bypassedLabels; - bypassedLabels.resize(blockNum); + // Stores the current forward targets + set fwdTargets; // Now retraverse the blocks and figure out all JIPs for (int32_t blockID = 0; blockID < blockNum; ++blockID) { const LabelIndex ownLabel = braTargets[blockID].first; const LabelIndex target = braTargets[blockID].second; const BasicBlock &bb = fn.getBlock(ownLabel); - const Instruction *insn = bb.getLastInstruction(); + const Instruction *label = bb.getFirstInstruction(); + const Instruction *bra = bb.getLastInstruction(); + + // Expires the branches that point to us (if any) + auto it = fwdTargets.find(ownLabel); + if (it != fwdTargets.end()) fwdTargets.erase(it); + + // If there is an outstanding forward branch, compute a JIP for the label + auto lower = fwdTargets.lower_bound(LabelIndex(0)); + GBE_ASSERT(label->isMemberOf() == true); + if (lower != fwdTargets.end()) + JIPs.insert(std::make_pair(label, *lower)); + + // Handle special cases and backward branches first if (ownLabel == noTarget) continue; // unused block if (target == noTarget) continue; // no branch at all - GBE_ASSERT(insn->isMemberOf() == true); + GBE_ASSERT(bra->isMemberOf() == true); if (target <= ownLabel) { // bwd branch: we always jump - JIPs.insert(std::make_pair(insn, LabelIndex(target))); + JIPs.insert(std::make_pair(bra, LabelIndex(target))); continue; } - // Traverse all previous blocks and see if we bypass their target - uint32_t bypassedNum = 0; - uint32_t JIP = target; - for (int32_t prevID = blockID-1; prevID >= 0; --prevID) { - const LabelIndex prevTarget = braTargets[prevID].second; - if (prevTarget == noTarget) continue; // no branch - if (prevTarget >= target) continue; // complete bypass - if (prevTarget <= ownLabel) continue; // branch falls before - bypassedLabels[bypassedNum++] = prevTarget; - JIP = min(uint32_t(JIP), uint32_t(prevTarget)); - } - - // We now have the (possibly) updated JIP for the branch - JIPs.insert(std::make_pair(insn, LabelIndex(JIP))); - - // No bypassed targets - if (bypassedNum == 0) continue; - - // When we have several bypassed targets, we must simply sort them and - // chain them such target_n points to target_{n+1} - bypassedLabels[bypassedNum++] = target; - std::sort(&bypassedLabels[0], &bypassedLabels[bypassedNum]); - - // Bypassed labels have a JIP now. However, we will only insert the - // instructions later since *several* branches can bypass the same label. - // For that reason, we must consider the *minimum* JIP - for (uint32_t bypassedID = 0; bypassedID < bypassedNum-1; ++bypassedID) { - const LabelIndex curr = bypassedLabels[bypassedID]; - const LabelIndex next = bypassedLabels[bypassedID+1]; - joinTargets[curr] = min(joinTargets[curr], next); - } + // This is a forward jump, register it and get the JIP + fwdTargets.insert(target); + auto jip = fwdTargets.lower_bound(LabelIndex(0)); + JIPs.insert(std::make_pair(bra, *jip)); } - // Now we also processed all JOIN points (i.e. each label). We can insert - // the label instructions that have a JIP - for (uint32_t label = 0; label < fn.labelNum(); ++label) { - const LabelIndex target = joinTargets[label]; - if (target == noTarget) continue; - const Instruction *insn = fn.getLabelInstruction(LabelIndex(label)); - JIPs.insert(std::make_pair(insn, target)); - } } bool Context::isScalarReg(const ir::Register ®) const { diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 0eb6b8d..3e04837 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -21,7 +21,6 @@ * \file gen_context.cpp * \author Benjamin Segovia */ - #include "backend/gen_context.hpp" #include "backend/gen_program.hpp" #include "backend/gen_defs.hpp" @@ -110,7 +109,7 @@ namespace gbe // Check that everything is consistent in the kernel code const uint32_t perLaneSize = kernel->getStackSize(); const uint32_t perThreadSize = perLaneSize * this->simdWidth; - const int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); + const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); GBE_ASSERT(perLaneSize > 0); GBE_ASSERT(isPowerOf<2>(perLaneSize) == true); GBE_ASSERT(isPowerOf<2>(perThreadSize) == true); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 3fbd952..9f78cc4 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1405,16 +1405,19 @@ namespace gbe const ir::Register reg = ctx.reg(pointerFamily); const ir::Immediate imm = ctx.getImmediate(immIndex); + // Set the destination register properly + ctx.MOV(imm.type, dst, stack); + // Easy case, we just increment the stack pointer if (needMultiply == false) { ctx.LOADI(imm.type, reg, immIndex); - ctx.ADD(imm.type, dst, stack, reg); + ctx.ADD(imm.type, stack, stack, reg); } // Harder case (variable length array) that requires a multiply else { ctx.LOADI(imm.type, reg, immIndex); ctx.MUL(imm.type, reg, this->getRegister(src), reg); - ctx.ADD(imm.type, dst, stack, reg); + ctx.ADD(imm.type, stack, stack, reg); } } diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 22c3317..21193a5 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -53,7 +53,9 @@ namespace gbe // Get the global LLVM context llvm::LLVMContext& c = llvm::getGlobalContext(); std::string errInfo; - std::unique_ptr o(new llvm::raw_fd_ostream("-", errInfo)); + std::unique_ptr o = NULL; + if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM) + o = std::unique_ptr(new llvm::raw_fd_ostream("-", errInfo)); // Get the module from its file SMDiagnostic Err; @@ -82,9 +84,11 @@ namespace gbe passes.run(mod); // raw_fd_ostream closes stdout. We must reopen it - o = NULL; - const int fd = open("/dev/tty", O_WRONLY); - stdout = fdopen(fd, "w"); + if (OCL_OUTPUT_LLVM_BEFORE_EXTRA_PASS || OCL_OUTPUT_LLVM) { + o = NULL; + const int fd = open("/dev/tty", O_WRONLY); + stdout = fdopen(fd, "w"); + } return true; } diff --git a/utests/Makefile b/utests/Makefile index 13700ac..6455c37 100644 --- a/utests/Makefile +++ b/utests/Makefile @@ -11,7 +11,6 @@ C_SRC=utest_error.c CPP_SRC=\ app_mandelbrot.cpp \ compiler_argument_structure.cpp \ - compiler_argument_structure_indirect.cpp \ compiler_array0.cpp \ compiler_array.cpp \ compiler_byte_scatter.cpp \ @@ -21,7 +20,6 @@ CPP_SRC=\ compiler_function_argument1.cpp \ compiler_function_argument.cpp \ compiler_if_else.cpp \ - compiler_local_slm.cpp \ compiler_lower_return0.cpp \ compiler_lower_return1.cpp \ compiler_lower_return2.cpp \ @@ -43,6 +41,10 @@ CPP_SRC=\ utest_file_map.cpp \ utest_helper.cpp +# unsupported for now +# compiler_local_slm.cpp +#compiler_argument_structure_indirect.cpp + OBJ=$(C_SRC:.c=.o) $(CPP_SRC:.cpp=.o) all: utest_run runtime_flat_address_space @@ -56,4 +58,4 @@ runtime_flat_address_space: $(OBJ) runtime_flat_address_space.o clean: rm -f $(OBJ) rm -f utest_run utest_run.o - rm -f runtime_flat_address_space runtime_flat_address_space.o \ No newline at end of file + rm -f runtime_flat_address_space runtime_flat_address_space.o