From: Ruiling Song Date: Mon, 3 Nov 2014 07:52:25 +0000 (+0800) Subject: GBE: workaround register allocation fail caused by custom loop unroll. X-Git-Url: http://review.tizen.org/git/?p=contrib%2Fbeignet.git;a=commitdiff_plain;h=7987b7996daebeed8a8c0023d6f1b29dcb28951c GBE: workaround register allocation fail caused by custom loop unroll. As this issue only occurs under strict math, we disable custom loop unroll if strict math is enabled. Signed-off-by: Ruiling Song Reviewed-by: Zhigang Gong --- diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 0c7785b..33aec59 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -107,7 +107,7 @@ namespace gbe FPM.doFinalization(); } - void runModulePass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL, int optLevel) + void runModulePass(Module &mod, TargetLibraryInfo *libraryInfo, const DataLayout &DL, int optLevel, bool strictMath) { llvm::PassManager MPM; @@ -158,11 +158,16 @@ namespace gbe MPM.add(createGVNPass()); // Remove redundancies } #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 - MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops - MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops - if(optLevel > 0) { - MPM.add(createSROAPass(/*RequiresDomTree*/ false)); - MPM.add(createGVNPass()); // Remove redundancies + // FIXME Workaround: we find that CustomLoopUnroll may increase register pressure greatly, + // and it may even make som cl kernel cannot compile because of limited scratch memory for spill. + // As we observe this under strict math. So we disable CustomLoopUnroll if strict math is enabled. + if (!strictMath) { + MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll loops + if(optLevel > 0) { + MPM.add(createSROAPass(/*RequiresDomTree*/ false)); + MPM.add(createGVNPass()); // Remove redundancies + } } #endif MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset @@ -250,7 +255,7 @@ namespace gbe OUTPUT_BITCODE(AFTER_LINK, mod); runFuntionPass(mod, libraryInfo, DL); - runModulePass(mod, libraryInfo, DL, optLevel); + runModulePass(mod, libraryInfo, DL, optLevel, strictMath); llvm::PassManager passes; #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 passes.add(new DataLayoutPass(DL));