if (!RTCall)
return false;
- bool WasSplit = splitTargetDataBeginRTC(RTCall);
+ // TODO: Check if can be moved upwards.
+ bool WasSplit = false;
+ Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
+ if (WaitMovementPoint)
+ WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
+
Changed |= WasSplit;
return WasSplit;
};
return Changed;
}
+ /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
+ /// moved. Returns nullptr if the movement is not possible, or not worth it.
+ Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
+ // FIXME: This traverses only the BasicBlock where RuntimeCall is.
+ // Make it traverse the CFG.
+
+ Instruction *CurrentI = &RuntimeCall;
+ bool IsWorthIt = false;
+ while ((CurrentI = CurrentI->getNextNode())) {
+
+ // TODO: Once we detect the regions to be offloaded we should use the
+ // alias analysis manager to check if CurrentI may modify one of
+ // the offloaded regions.
+ if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
+ if (IsWorthIt)
+ return CurrentI;
+
+ return nullptr;
+ }
+
+ // FIXME: For now if we move it over anything without side effect
+ // is worth it.
+ IsWorthIt = true;
+ }
+
+ // Return end of BasicBlock.
+ return RuntimeCall.getParent()->getTerminator();
+ }
+
/// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
- bool splitTargetDataBeginRTC(CallInst *RuntimeCall) {
+ bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
+ Instruction &WaitMovementPoint) {
auto &IRBuilder = OMPInfoCache.OMPBuilder;
// Add "issue" runtime call declaration:
// declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
// Change RuntimeCall call site for its asynchronous version.
SmallVector<Value *, 8> Args;
- for (auto &Arg : RuntimeCall->args())
+ for (auto &Arg : RuntimeCall.args())
Args.push_back(Arg.get());
CallInst *IssueCallsite =
- CallInst::Create(IssueDecl, Args, "handle", RuntimeCall);
- RuntimeCall->eraseFromParent();
+ CallInst::Create(IssueDecl, Args, "handle", &RuntimeCall);
+ RuntimeCall.eraseFromParent();
// Add "wait" runtime call declaration:
// declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
IssueCallsite->getArgOperand(0), // device_id.
IssueCallsite // returned handle.
};
- CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"",
- IssueCallsite->getNextNode());
+ CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
return true;
}
; CHECK-NEXT: store double* %a, double** %4, align 8
; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null)
-; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
; CHECK-NEXT: %5 = bitcast double* %a to i64*
+
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+
; CHECK-NEXT: %6 = load i64, i64* %5, align 8
; CHECK-NEXT: %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0
; CHECK-NEXT: %8 = bitcast [1 x i8*]* %.offload_baseptrs4 to i64*
; CHECK-NEXT: %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
; CHECK-NEXT: store i64 4, i64* %10, align 8
-; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 2, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null)
-; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null)
; CHECK-NEXT: %11 = load i32, i32* %size.addr, align 4
; CHECK-NEXT: %size.casted = zext i32 %11 to i64
; CHECK-NEXT: %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
; CHECK-NEXT: store i64 4, i64* %10, align 8
-; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 2, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null)
-; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null)
; CHECK-NEXT: %11 = load i32, i32* %size.addr, align 4
; CHECK-NEXT: %size.casted = zext i32 %11 to i64
; CHECK-NEXT: store i64 %0, i64* %5, align 8
; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null)
-; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
; CHECK-NEXT: %rem = urem i32 %call, %size
+
+; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle)
+
; CHECK-NEXT: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null)
; CHECK-NEXT: ret i32 %rem
;