#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+extern "C" {
+#include "gpu.h"
+#include "ppcg.h"
+}
+
#include "llvm/Support/Debug.h"
using namespace polly;
public:
static char ID;
+ /// The scop that is currently processed.
+ Scop *S;
+
PPCGCodeGeneration() : ScopPass(ID) {}
- bool runOnScop(Scop &S) override { return true; }
+ /// Construct compilation options for PPCG.
+ ///
+ /// @returns The compilation options.
+ ppcg_options *createPPCGOptions() {
+ auto DebugOptions =
+ (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options));
+ auto Options = (ppcg_options *)malloc(sizeof(ppcg_options));
+
+ DebugOptions->dump_schedule_constraints = false;
+ DebugOptions->dump_schedule = false;
+ DebugOptions->dump_final_schedule = false;
+ DebugOptions->dump_sizes = false;
+
+ Options->debug = DebugOptions;
+
+ Options->reschedule = true;
+ Options->scale_tile_loops = false;
+ Options->wrap = false;
+
+ Options->non_negative_parameters = false;
+ Options->ctx = nullptr;
+ Options->sizes = nullptr;
+
+ Options->use_private_memory = false;
+ Options->use_shared_memory = false;
+ Options->max_shared_memory = 0;
+
+ Options->target = PPCG_TARGET_CUDA;
+ Options->openmp = false;
+ Options->linearize_device_arrays = true;
+ Options->live_range_reordering = false;
+
+ Options->opencl_compiler_options = nullptr;
+ Options->opencl_use_gpu = false;
+ Options->opencl_n_include_file = 0;
+ Options->opencl_include_files = nullptr;
+ Options->opencl_print_kernel_types = false;
+ Options->opencl_embed_kernel_code = false;
+
+ Options->save_schedule_file = nullptr;
+ Options->load_schedule_file = nullptr;
+
+ return Options;
+ }
+
+ /// Create a new PPCG scop from the current scop.
+ ///
+ /// For now the created scop is initialized to 'zero' and does not contain
+ /// any scop-specific information.
+ ///
+ /// @returns A new ppcg scop.
+ ppcg_scop *createPPCGScop() {
+ auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop));
+
+ PPCGScop->options = createPPCGOptions();
+
+ PPCGScop->start = 0;
+ PPCGScop->end = 0;
+
+ PPCGScop->context = nullptr;
+ PPCGScop->domain = nullptr;
+ PPCGScop->call = nullptr;
+ PPCGScop->tagged_reads = nullptr;
+ PPCGScop->reads = nullptr;
+ PPCGScop->live_in = nullptr;
+ PPCGScop->tagged_may_writes = nullptr;
+ PPCGScop->may_writes = nullptr;
+ PPCGScop->tagged_must_writes = nullptr;
+ PPCGScop->must_writes = nullptr;
+ PPCGScop->live_out = nullptr;
+ PPCGScop->tagged_must_kills = nullptr;
+ PPCGScop->tagger = nullptr;
+
+ PPCGScop->independence = nullptr;
+ PPCGScop->dep_flow = nullptr;
+ PPCGScop->tagged_dep_flow = nullptr;
+ PPCGScop->dep_false = nullptr;
+ PPCGScop->dep_forced = nullptr;
+ PPCGScop->dep_order = nullptr;
+ PPCGScop->tagged_dep_order = nullptr;
+
+ PPCGScop->schedule = nullptr;
+ PPCGScop->names = nullptr;
+
+ PPCGScop->pet = nullptr;
+
+ return PPCGScop;
+ }
+
+ /// Create a default-initialized PPCG GPU program.
+ ///
+ /// @returns A new gpu grogram description.
+ gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) {
+
+ if (!PPCGScop)
+ return nullptr;
+
+ auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog);
+
+ PPCGProg->ctx = S->getIslCtx();
+ PPCGProg->scop = PPCGScop;
+ PPCGProg->context = nullptr;
+ PPCGProg->read = nullptr;
+ PPCGProg->may_write = nullptr;
+ PPCGProg->must_write = nullptr;
+ PPCGProg->tagged_must_kill = nullptr;
+ PPCGProg->may_persist = nullptr;
+ PPCGProg->to_outer = nullptr;
+ PPCGProg->to_inner = nullptr;
+ PPCGProg->any_to_outer = nullptr;
+ PPCGProg->array_order = nullptr;
+ PPCGProg->n_stmts = 0;
+ PPCGProg->stmts = nullptr;
+ PPCGProg->n_array = 0;
+ PPCGProg->array = nullptr;
+
+ return PPCGProg;
+ }
+
+ bool runOnScop(Scop &CurrentScop) override {
+ S = &CurrentScop;
+
+ auto PPCGScop = createPPCGScop();
+ auto PPCGProg = createPPCGProg(PPCGScop);
+ gpu_prog_free(PPCGProg);
+ ppcg_scop_free(PPCGScop);
+
+ return true;
+ }
void printScop(raw_ostream &, Scop &) const override {}
--- /dev/null
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s
+; REQUIRES: pollyacc
+
+; CHECK: Stmt_bb5
+; CHECK: Domain :=
+; CHECK: { Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 };
+; CHECK: Schedule :=
+; CHECK: { Stmt_bb5[i0, i1] -> [i0, i1] };
+; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] };
+; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] };
+;
+; void double_parallel_loop(float A[][1024]) {
+; for (long i = 0; i < 1024; i++)
+; for (long j = 0; j < 1024; j++)
+; A[i][j] += i * j;
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @double_parallel_loop([1024 x float]* %A) {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb13, %bb
+ %i.0 = phi i64 [ 0, %bb ], [ %tmp14, %bb13 ]
+ %exitcond1 = icmp ne i64 %i.0, 1024
+ br i1 %exitcond1, label %bb3, label %bb15
+
+bb3: ; preds = %bb2
+ br label %bb4
+
+bb4: ; preds = %bb10, %bb3
+ %j.0 = phi i64 [ 0, %bb3 ], [ %tmp11, %bb10 ]
+ %exitcond = icmp ne i64 %j.0, 1024
+ br i1 %exitcond, label %bb5, label %bb12
+
+bb5: ; preds = %bb4
+ %tmp = mul nuw nsw i64 %i.0, %j.0
+ %tmp6 = sitofp i64 %tmp to float
+ %tmp7 = getelementptr inbounds [1024 x float], [1024 x float]* %A, i64 %i.0, i64 %j.0
+ %tmp8 = load float, float* %tmp7, align 4
+ %tmp9 = fadd float %tmp8, %tmp6
+ store float %tmp9, float* %tmp7, align 4
+ br label %bb10
+
+bb10: ; preds = %bb5
+ %tmp11 = add nuw nsw i64 %j.0, 1
+ br label %bb4
+
+bb12: ; preds = %bb4
+ br label %bb13
+
+bb13: ; preds = %bb12
+ %tmp14 = add nuw nsw i64 %i.0, 1
+ br label %bb2
+
+bb15: ; preds = %bb2
+ ret void
+}