[OpenMP][FIX] Be more deliberate about invalidating the AAKernelInfo state
authorJohannes Doerfert <johannes@jdoerfert.de>
Wed, 8 Sep 2021 20:54:27 +0000 (15:54 -0500)
committerJohannes Doerfert <johannes@jdoerfert.de>
Thu, 23 Sep 2021 05:04:30 +0000 (00:04 -0500)
This patch fixes a problem when the AAKernelInfo state was invalidated,
e.g., due to `optnone` for a kernel, but not all parts indicated the
invalidation properly. We further eliminate most full state
invalidations as they should never be necessary.

Differential Revision: https://reviews.llvm.org/D109468

llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll

index 8ae0780..0ec36d9 100644 (file)
@@ -579,7 +579,9 @@ struct KernelInfoState : AbstractState {
   /// See AbstractState::indicatePessimisticFixpoint(...)
   ChangeStatus indicatePessimisticFixpoint() override {
     IsAtFixpoint = true;
+    ReachingKernelEntries.indicatePessimisticFixpoint();
     SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+    ReachedKnownParallelRegions.indicatePessimisticFixpoint();
     ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
     return ChangeStatus::CHANGED;
   }
@@ -626,13 +628,13 @@ struct KernelInfoState : AbstractState {
   KernelInfoState operator^=(const KernelInfoState &KIS) {
     // Do not merge two different _init and _deinit call sites.
     if (KIS.KernelInitCB) {
-      if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
-        indicatePessimisticFixpoint();
+      if(KernelInitCB && KernelInitCB != KIS.KernelInitCB)
+        llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt assumptions.");
       KernelInitCB = KIS.KernelInitCB;
     }
     if (KIS.KernelDeinitCB) {
-      if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
-        indicatePessimisticFixpoint();
+      if(KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
+        llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt assumptions.");
       KernelDeinitCB = KIS.KernelDeinitCB;
     }
     SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
@@ -2828,9 +2830,13 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
            std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
                                                                : "") +
            std::string(" #PRs: ") +
-           std::to_string(ReachedKnownParallelRegions.size()) +
+           (ReachedKnownParallelRegions.isValidState()
+                ? std::to_string(ReachedKnownParallelRegions.size())
+                : "<invalid>") +
            ", #Unknown PRs: " +
-           std::to_string(ReachedUnknownParallelRegions.size()) +
+           (ReachedUnknownParallelRegions.isValidState()
+                ? std::to_string(ReachedUnknownParallelRegions.size())
+                : "<invalid>") +
            ", #Reaching Kernels: " +
            (ReachingKernelEntries.isValidState()
                 ? std::to_string(ReachingKernelEntries.size())
@@ -3036,7 +3042,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
     // If we can we change the execution mode to SPMD-mode otherwise we build a
     // custom state machine.
     if (!mayContainParallelRegion() || !changeToSPMDMode(A))
-      buildCustomStateMachine(A);
+      return buildCustomStateMachine(A);
 
     return ChangeStatus::CHANGED;
   }
@@ -3340,7 +3346,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
   ChangeStatus buildCustomStateMachine(Attributor &A) {
     // If we have disabled state machine rewrites, don't make a custom one
     if (DisableOpenMPOptStateMachineRewrite)
-      return indicatePessimisticFixpoint();
+      return ChangeStatus::UNCHANGED;
 
     assert(ReachedKnownParallelRegions.isValidState() &&
            "Custom state machine with invalid parallel region states?");
@@ -3683,8 +3689,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
 
     bool UsedAssumedInformationInCheckCallInst = false;
     if (!A.checkForAllCallLikeInstructions(
-            CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
+            CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
+      LLVM_DEBUG(dbgs() << TAG << "Failed to visit all call-like instructions!\n";);
       return indicatePessimisticFixpoint();
+    }
 
     // If we haven't used any assumed information for the SPMD state we can fix
     // it.
@@ -3892,18 +3900,16 @@ struct AAKernelInfoCallSite : AAKernelInfo {
       // We do not look into tasks right now, just give up.
       SPMDCompatibilityTracker.insert(&CB);
       ReachedUnknownParallelRegions.insert(&CB);
-      indicatePessimisticFixpoint();
-      return;
+      break;
     case OMPRTL___kmpc_alloc_shared:
     case OMPRTL___kmpc_free_shared:
       // Return without setting a fixpoint, to be resolved in updateImpl.
       return;
     default:
       // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
-      // generally.
+      // generally. However, they do not hide parallel regions.
       SPMDCompatibilityTracker.insert(&CB);
-      indicatePessimisticFixpoint();
-      return;
+      break;
     }
     // All other OpenMP runtime calls will not reach parallel regions so they
     // can be safely ignored for now. Since it is a known OpenMP runtime call we
index a6cbd20..9d2014c 100644 (file)
@@ -68,9 +68,10 @@ define weak void @kernel2() #0 {
 }
 
 define internal void @helper0() {
-; CHECK-LABEL: define {{[^@]+}}@helper0() {
-; CHECK-NEXT:    [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1:[0-9]+]]
-; CHECK-NEXT:    store i32 undef, i32* @G, align 4
+; CHECK-LABEL: define {{[^@]+}}@helper0
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
+; CHECK-NEXT:    store i32 [[THREADLIMIT]], i32* @G, align 4
 ; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
@@ -81,11 +82,13 @@ define internal void @helper0() {
 define internal void @helper1() {
 ; CHECK-LABEL: define {{[^@]+}}@helper1() {
 ; CHECK-NEXT:    [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
-; CHECK-NEXT:    unreachable
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[THREADLIMIT]], 666
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]]
 ; CHECK:       t:
-; CHECK-NEXT:    unreachable
+; CHECK-NEXT:    call void @helper0() #[[ATTR1]]
+; CHECK-NEXT:    ret void
 ; CHECK:       f:
-; CHECK-NEXT:    unreachable
+; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
   %c = icmp eq i32 %threadLimit, 666
@@ -100,7 +103,7 @@ f:
 define internal void @helper2() {
 ; CHECK-LABEL: define {{[^@]+}}@helper2() {
 ; CHECK-NEXT:    [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
-; CHECK-NEXT:    store i32 undef, i32* @G, align 4
+; CHECK-NEXT:    store i32 [[THREADLIMIT]], i32* @G, align 4
 ; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()