[OPENMP, NVPTX] Initial support for L2 parallelism in SPMD mode.

author Alexey Bataev <a.bataev@hotmail.com>

Thu, 10 May 2018 18:32:08 +0000 (18:32 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Thu, 10 May 2018 18:32:08 +0000 (18:32 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Thu, 10 May 2018 18:32:08 +0000 (18:32 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Thu, 10 May 2018 18:32:08 +0000 (18:32 +0000)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

index cee5ca6..98d8b0f 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -140,13 +140,15 @@ public:
  /// to emit optimized code.
  class ExecutionModeRAII {
  private:
-  bool SavedMode;
-  bool &Mode;
+  CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
+  CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
  
  public:
-  ExecutionModeRAII(bool &Mode, bool NewMode) : Mode(Mode) {
+  ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
+      : Mode(Mode) {
      SavedMode = Mode;
-    Mode = NewMode;
+    Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
+                  : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
    }
    ~ExecutionModeRAII() { Mode = SavedMode; }
  };
@@ -579,8 +581,9 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
    WorkerFn->setDoesNotRecurse();
  }
  
-bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
-  return IsInSPMDExecutionMode;
+CGOpenMPRuntimeNVPTX::ExecutionMode
+CGOpenMPRuntimeNVPTX::getExecutionMode() const {
+  return CurrentExecutionMode;
  }
  
  static CGOpenMPRuntimeNVPTX::DataSharingMode
@@ -589,34 +592,96 @@ getDataSharingMode(CodeGenModule &CGM) {
                                            : CGOpenMPRuntimeNVPTX::Generic;
  }
  
-/// Check for inner (nested) SPMD construct, if any
-static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) {
-  const auto *CS = D.getCapturedStmt(OMPD_target);
-  const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
-  const Stmt *ChildStmt = nullptr;
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one.
+static const Stmt *getSingleCompoundChild(const Stmt *Body) {
    if (const auto *C = dyn_cast<CompoundStmt>(Body))
      if (C->size() == 1)
-      ChildStmt = C->body_front();
-  if (!ChildStmt)
-    return false;
+      return C->body_front();
+  return Body;
+}
+
+/// Check if the parallel directive has an 'if' clause with non-constant or
+/// false condition.
+static bool hasParallelIfClause(ASTContext &Ctx,
+                                const OMPExecutableDirective &D) {
+  for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
+    OpenMPDirectiveKind NameModifier = C->getNameModifier();
+    if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown)
+      continue;
+    const Expr *Cond = C->getCondition();
+    bool Result;
+    if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result)
+      return true;
+  }
+  return false;
+}
+
+/// Check for inner (nested) SPMD construct, if any
+static bool hasNestedSPMDDirective(ASTContext &Ctx,
+                                   const OMPExecutableDirective &D) {
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
+  const Stmt *ChildStmt = getSingleCompoundChild(Body);
  
    if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
      OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
-    // TODO: add further analysis for inner teams|distribute directives, if any.
      switch (D.getDirectiveKind()) {
      case OMPD_target:
-      return (isOpenMPParallelDirective(DKind) &&
-              !isOpenMPTeamsDirective(DKind) &&
-              !isOpenMPDistributeDirective(DKind)) ||
-             isOpenMPSimdDirective(DKind) ||
-             DKind == OMPD_teams_distribute_parallel_for;
+      if ((isOpenMPParallelDirective(DKind) &&
+           !hasParallelIfClause(Ctx, *NestedDir)) ||
+          isOpenMPSimdDirective(DKind))
+        return true;
+      if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+        if (!Body)
+          return false;
+        ChildStmt = getSingleCompoundChild(Body);
+        if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if ((isOpenMPParallelDirective(DKind) &&
+               !hasParallelIfClause(Ctx, *NND)) ||
+              isOpenMPSimdDirective(DKind))
+            return true;
+          if (DKind == OMPD_distribute) {
+            Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+            if (!Body)
+              return false;
+            ChildStmt = getSingleCompoundChild(Body);
+            if (!ChildStmt)
+              return false;
+            if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+              DKind = NND->getDirectiveKind();
+              return (isOpenMPParallelDirective(DKind) &&
+                      !hasParallelIfClause(Ctx, *NND)) ||
+                     isOpenMPSimdDirective(DKind);
+            }
+          }
+        }
+      }
+      return false;
      case OMPD_target_teams:
-      return (isOpenMPParallelDirective(DKind) &&
-              !isOpenMPDistributeDirective(DKind)) ||
-             isOpenMPSimdDirective(DKind) ||
-             DKind == OMPD_distribute_parallel_for;
+      if ((isOpenMPParallelDirective(DKind) &&
+           !hasParallelIfClause(Ctx, *NestedDir)) ||
+          isOpenMPSimdDirective(DKind))
+        return true;
+      if (DKind == OMPD_distribute) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+        if (!Body)
+          return false;
+        ChildStmt = getSingleCompoundChild(Body);
+        if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          return (isOpenMPParallelDirective(DKind) &&
+                  !hasParallelIfClause(Ctx, *NND)) ||
+                 isOpenMPSimdDirective(DKind);
+        }
+      }
+      return false;
      case OMPD_target_teams_distribute:
-      return isOpenMPParallelDirective(DKind) || isOpenMPSimdDirective(DKind);
+      return (isOpenMPParallelDirective(DKind) &&
+              !hasParallelIfClause(Ctx, *NestedDir)) ||
+             isOpenMPSimdDirective(DKind);
      case OMPD_target_simd:
      case OMPD_target_parallel:
      case OMPD_target_parallel_for:
@@ -674,20 +739,22 @@ static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) {
    return false;
  }
  
-static bool supportsSPMDExecutionMode(const OMPExecutableDirective &D) {
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+                                      const OMPExecutableDirective &D) {
    OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
    switch (DirectiveKind) {
    case OMPD_target:
    case OMPD_target_teams:
    case OMPD_target_teams_distribute:
-    return hasNestedSPMDDirective(D);
-  case OMPD_target_simd:
+    return hasNestedSPMDDirective(Ctx, D);
    case OMPD_target_parallel:
    case OMPD_target_parallel_for:
    case OMPD_target_parallel_for_simd:
-  case OMPD_target_teams_distribute_simd:
    case OMPD_target_teams_distribute_parallel_for:
    case OMPD_target_teams_distribute_parallel_for_simd:
+    return !hasParallelIfClause(Ctx, D);
+  case OMPD_target_simd:
+  case OMPD_target_teams_distribute_simd:
      return true;
    case OMPD_parallel:
    case OMPD_for:
@@ -744,7 +811,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
                                               llvm::Constant *&OutlinedFnID,
                                               bool IsOffloadEntry,
                                               const RegionCodeGenTy &CodeGen) {
-  ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/false);
+  ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
    EntryFunctionState EST;
    WorkerFunctionState WST(CGM, D.getLocStart());
    Work.clear();
@@ -858,7 +925,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
                                            llvm::Constant *&OutlinedFnID,
                                            bool IsOffloadEntry,
                                            const RegionCodeGenTy &CodeGen) {
-  ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/true);
+  ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
    EntryFunctionState EST;
  
    // Emit target region as a standalone region.
@@ -905,11 +972,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
  
    CGF.EmitBlock(ExecuteBB);
  
+  IsInTargetMasterThreadRegion = true;
    emitGenericVarsProlog(CGF, D.getLocStart());
  }
  
  void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
                                                 EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
    if (!CGF.HaveInsertPoint())
      return;
  
@@ -1380,7 +1449,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
  
    assert(!ParentName.empty() && "Invalid target region parent name!");
  
-  bool Mode = supportsSPMDExecutionMode(D);
+  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
    if (Mode)
      emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
                     CodeGen);
@@ -1401,8 +1470,8 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
                                                OpenMPProcBindClauseKind ProcBind,
                                                SourceLocation Loc) {
    // Do nothing in case of Spmd mode and L0 parallel.
-  // TODO: If in Spmd mode and L1 parallel emit the clause.
-  if (isInSpmdExecutionMode())
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
+      IsInTargetMasterThreadRegion)
      return;
  
    CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
@@ -1412,8 +1481,8 @@ void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
                                                  llvm::Value *NumThreads,
                                                  SourceLocation Loc) {
    // Do nothing in case of Spmd mode and L0 parallel.
-  // TODO: If in Spmd mode and L1 parallel emit the clause.
-  if (isInSpmdExecutionMode())
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
+      IsInTargetMasterThreadRegion)
      return;
  
    CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
@@ -1457,7 +1526,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
        cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
            D, ThreadIDVar, InnermostKind, CodeGen));
    IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
-  if (!isInSpmdExecutionMode() && !IsInParallelRegion) {
+  if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
+      !IsInParallelRegion) {
      llvm::Function *WrapperFun =
          createParallelDataSharingWrapper(OutlinedFun, D);
      WrapperFunctionsMap[OutlinedFun] = WrapperFun;
@@ -1635,7 +1705,7 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall(
    if (!CGF.HaveInsertPoint())
      return;
  
-  if (isInSpmdExecutionMode())
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
      emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
    else
      emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
@@ -1759,6 +1829,8 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
        SeqGen(CGF, Action);
      } else if (IsInTargetMasterThreadRegion) {
        L0ParallelGen(CGF, Action);
+    } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) {
+      RCG(CGF);
      } else {
        // Check for master and then parallelism:
        // if (is_master) {
@@ -1770,20 +1842,18 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
        // }
        CGBuilderTy &Bld = CGF.Builder;
        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-      if (!isInSpmdExecutionMode()) {
-        llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
-        llvm::BasicBlock *ParallelCheckBB =
-            CGF.createBasicBlock(".parallelcheck");
-        llvm::Value *IsMaster =
-            Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
-        Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB);
-        CGF.EmitBlock(MasterCheckBB);
-        L0ParallelGen(CGF, Action);
-        CGF.EmitBranch(ExitBB);
-        // There is no need to emit line number for unconditional branch.
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(ParallelCheckBB);
-      }
+      llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+      llvm::BasicBlock *ParallelCheckBB =
+          CGF.createBasicBlock(".parallelcheck");
+      llvm::Value *IsMaster =
+          Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+      Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB);
+      CGF.EmitBlock(MasterCheckBB);
+      L0ParallelGen(CGF, Action);
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(ParallelCheckBB);
        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
        llvm::Value *ThreadID = getThreadID(CGF, Loc);
        llvm::Value *PL = CGF.EmitRuntimeCall(
@@ -1827,14 +1897,49 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
    // is added on Spmd target directives.
    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
  
-  Address ZeroAddr = CGF.CreateMemTemp(
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
-      ".zero.addr");
+  Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+                                           /*DestWidth=*/32, /*Signed=*/1),
+                                       ".zero.addr");
    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+  Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
+                    ThreadIDAddr](CodeGenFunction &CGF,
+                                  PrePostActionTy &Action) {
+    Action.Enter(CGF);
+
+    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+    emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+  };
+  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+                                        PrePostActionTy &) {
+
+    RegionCodeGenTy RCG(CodeGen);
+    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+
+    NVPTXActionTy Action(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        Args,
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        Args);
+    RCG.setAction(Action);
+    RCG(CGF);
+  };
+
+  if (IsInTargetMasterThreadRegion) {
+    RegionCodeGenTy RCG(CodeGen);
+    RCG(CGF);
+  } else {
+    // If we are not in the target region, it is definitely L2 parallelism or
+    // more, because for SPMD mode we always has L1 parallel level, sowe don't
+    // need to check for orphaned directives.
+    RegionCodeGenTy RCG(SeqGen);
+    RCG(CGF);
+  }
  }
  
  void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h

index 88420b9..c7d647b 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -24,6 +24,16 @@ namespace clang {
  namespace CodeGen {
  
  class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+public:
+  /// Defines the execution mode.
+  enum ExecutionMode {
+    /// SPMD execution mode (all threads are worker threads).
+    EM_SPMD,
+    /// Non-SPMD execution mode (1 master thread, others are workers).
+    EM_NonSPMD,
+    /// Unknown execution mode (orphaned directive).
+    EM_Unknown,
+  };
  private:
    /// Parallel outlined function work for workers to execute.
    llvm::SmallVector<llvm::Function *, 16> Work;
@@ -44,7 +54,7 @@ private:
      void createWorkerFunction(CodeGenModule &CGM);
    };
  
-  bool isInSpmdExecutionMode() const;
+  ExecutionMode getExecutionMode() const;
  
    /// Emit the worker function for the current target region.
    void emitWorkerFunction(WorkerFunctionState &WST);
@@ -334,7 +344,7 @@ private:
    /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
    /// target region and used by containing directives such as 'parallel'
    /// to emit optimized code.
-  bool IsInSPMDExecutionMode = false;
+  ExecutionMode CurrentExecutionMode = EM_Unknown;
  
    /// true if we're emitting the code for the target region and next parallel
    /// region is L0 for sure.
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp

index 071b08d..8f496eb 100644 (file)
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -58,6 +58,7 @@ tx ftemplate(int n) {
      #pragma omp critical
      ++a;
      }
+    ++a;
    }
    return a;
  }
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp

index eec9507..aa054ff 100644 (file)
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -9,8 +9,9 @@
  #define HEADER
  
  // Check that the execution mode of all 2 target regions is set to Generic Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
  
  template<typename tx>
  tx ftemplate(int n) {
@@ -33,6 +34,13 @@ tx ftemplate(int n) {
      aa = 1;
    }
  
+  #pragma omp target teams
+  {
+#pragma omp parallel
+#pragma omp parallel
+    aa = 1;
+  }
+
    return a;
  }
  
@@ -44,14 +52,14 @@ int bar(int n){
    return a;
  }
  
-  // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l21}}_worker()
+  // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l22}}_worker()
  
  
  
  
  
  
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l27}}_worker()
    // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
    // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
    // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -86,7 +94,7 @@ int bar(int n){
    // CHECK: [[EXIT]]
    // CHECK: ret void
  
-  // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l26]](i[[SZ:32|64]] [[A:%[^)]+]])
+  // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l27]](i[[SZ:32|64]] [[A:%[^)]+]])
    // CHECK: store i[[SZ]] [[A]], i[[SZ]]* [[A_ADDR:%.+]], align
    // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i8*
  
@@ -137,7 +145,7 @@ int bar(int n){
  
  
  
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l32}}_worker()
    // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
    // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
    // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -172,7 +180,7 @@ int bar(int n){
    // CHECK: [[EXIT]]
    // CHECK: ret void
  
-  // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l31]](i[[SZ:32|64]] [[AA:%[^)]+]])
+  // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l32]](i[[SZ:32|64]] [[AA:%[^)]+]])
    // CHECK: store i[[SZ]] [[AA]], i[[SZ]]* [[AA_ADDR:%.+]], align
    // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
  
@@ -218,5 +226,24 @@ int bar(int n){
    // CHECK: [[EXIT]]
    // CHECK: ret void
  
+// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
+// CHECK: call void @__kmpc_spmd_kernel_init(
+// CHECK: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK-NOT: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
+// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_data_sharing_pop_stack(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret
+
+// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L1:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.+}})
+// CHECK: call void @__kmpc_end_serialized_parallel(
+// CHECK: ret void
+
+// CHECK: define internal void [[L1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: store i16 1, i16* %
+// CHECK: ret void
  
  #endif
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp

index b9bd9fe..2e712e2 100644 (file)
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -13,7 +13,7 @@ int a;
  int foo(int *a);
  
  int main(int argc, char **argv) {
-#pragma omp target teams distribute parallel for map(tofrom:a) if(parallel:argc)
+#pragma omp target teams distribute parallel for map(tofrom:a) if(target:argc) schedule(static, a)
    for (int i= 0; i < argc; ++i)
      a = foo(&i) + foo(&a) + foo(&argc);
    return 0;
diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp

index aab9f50..ebd761f 100644 (file)
--- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=45 | FileCheck %s
  // expected-no-diagnostics
  
  int main() {
@@ -11,7 +11,7 @@ int main() {
    int c[10][10][10];
  #pragma omp target parallel firstprivate(a, b) map(tofrom          \
                                                     : c) map(tofrom \
-                                                            : bb) if (a)
+                                                            : bb) if (target:a)
    {
      int &f = c[1][1][1];
      int &g = a;
@@ -54,7 +54,7 @@ int main() {
    return 0;
  }
  
-// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}})
+// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}})
  // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
  // CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
author	Alexey Bataev <a.bataev@hotmail.com>
	Thu, 10 May 2018 18:32:08 +0000 (18:32 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Thu, 10 May 2018 18:32:08 +0000 (18:32 +0000)
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp		patch \| blob \| history
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h		patch \| blob \| history
clang/test/OpenMP/nvptx_parallel_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/nvptx_target_teams_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp		patch \| blob \| history
clang/test/OpenMP/target_parallel_debug_codegen.cpp		patch \| blob \| history