GPGPU: Optimize kernel IR before generating assembly code
authorTobias Grosser <tobias@grosser.es>
Sun, 24 Jul 2016 06:43:21 +0000 (06:43 +0000)
committerTobias Grosser <tobias@grosser.es>
Sun, 24 Jul 2016 06:43:21 +0000 (06:43 +0000)
We optimize the kernel _after_ dumping the IR we generate to make the IR we
dump easier readable and independent of possible changes in the general
purpose LLVM optimizers.

llvm-svn: 276551

polly/lib/CodeGen/PPCGCodeGeneration.cpp

index 420b3ee..a4609bf 100644 (file)
@@ -32,6 +32,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 #include "isl/union_map.h"
 
@@ -683,6 +684,14 @@ void GPUNodeBuilder::finalizeKernelFunction() {
   if (DumpKernelIR)
     outs() << *GPUModule << "\n";
 
+  // Optimize module.
+  llvm::legacy::PassManager OptPasses;
+  PassManagerBuilder PassBuilder;
+  PassBuilder.OptLevel = 3;
+  PassBuilder.SizeLevel = 0;
+  PassBuilder.populateModulePassManager(OptPasses);
+  OptPasses.run(*GPUModule);
+
   std::string Assembly = createKernelASM();
 
   if (DumpKernelASM)