Status status;
stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes);
// TODO(hpucha): Make this asynchronous.
- Status block_status = stream_->BlockHostUntilDoneWithStatus();
+ Status block_status = stream_->BlockHostUntilDone();
if (!block_status.ok()) {
status = xla::InternalError(
"Failed to complete data transfer on stream %p: %s", stream_,
Status status;
stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes);
// TODO(hpucha): Make this asynchronous.
- Status block_status = stream_->BlockHostUntilDoneWithStatus();
+ Status block_status = stream_->BlockHostUntilDone();
if (!block_status.ok()) {
status = xla::InternalError(
"Failed to complete data transfer on stream %p: %s", stream_,
}
for (const auto& options : run_options) {
TF_RET_CHECK(options.stream() != nullptr);
- TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDoneWithStatus());
+ TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone());
}
return return_values;
}
if (profile != nullptr) {
VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
stream->ThenStopTimer(timer.get());
- SE_CHECK_OK(stream->BlockHostUntilDoneWithStatus());
+ SE_CHECK_OK(stream->BlockHostUntilDone());
VLOG(1) << "done with block-host-until-done";
// Merge in run-time profile information from execution_profile.
tensorflow::Status AsyncExecution::BlockUntilDone() const {
for (auto& stream : streams_) {
- TF_RETURN_IF_ERROR(stream->BlockHostUntilDoneWithStatus());
+ TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
}
return tensorflow::Status::OK();
}
~HloExecutionProfiler() {
if (do_profile_) {
stream_->ThenStopTimer(execution_timer_.get());
- stream_->BlockHostUntilDoneWithStatus().IgnoreError();
+ stream_->BlockHostUntilDone().IgnoreError();
profile_->set_total_cycles_executed(
*computation_, execution_timer_->Nanoseconds() * clock_rate_ghz_);
}
void FinishOperation(const HloInstruction* hlo_instruction) {
if (do_profile_) {
stream_->ThenStopTimer(per_op_timer_.get());
- stream_->BlockHostUntilDoneWithStatus().IgnoreError();
+ stream_->BlockHostUntilDone().IgnoreError();
profile_->SetCyclesTakenBy(
hlo_instruction, per_op_timer_->Nanoseconds() * clock_rate_ghz_);
}
// If this thunk requests it, wait for all currently-executing thunks to
// finish. This is useful e.g. if the thunk is about to perform autotuning.
if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
- TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDoneWithStatus());
+ TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
}
profiler.StartOperation();
// TODO(b/30100571): we could potentially postpone deallocating the temp
// buffers until a different computation is executed.
if (block_host_until_done) {
- Status block_status = main_stream->BlockHostUntilDoneWithStatus();
+ Status block_status = main_stream->BlockHostUntilDone();
if (!block_status.ok()) {
return InternalError(
"Failed to complete all kernels launched on stream %p: %s",
// infeed requests, blocking on the stream might be
// heavy-handed. Figure out if finer-grained acknowledgement is
// possible.
- Status block_status = stream->BlockHostUntilDoneWithStatus();
+ Status block_status = stream->BlockHostUntilDone();
if (!block_status.ok()) {
for (gpu::InfeedBuffer* b : buffers) {
b->Done();
buffer->length());
}
- Status block_status = stream->BlockHostUntilDoneWithStatus();
+ Status block_status = stream->BlockHostUntilDone();
if (!block_status.ok()) {
return InternalError("Failed to complete data transfer on stream %p: %s",
stream, block_status.error_message().c_str());
// Copy the result of condition computation and break the loop if 'false'.
bool condition_result;
stream->ThenMemcpy(&condition_result, condition_result_data, sizeof(bool));
- Status block_status = stream->BlockHostUntilDoneWithStatus();
+ Status block_status = stream->BlockHostUntilDone();
if (!block_status.ok()) {
return InternalError(
"Failed to complete all kernels launched on stream %p: %s", stream,
se::DeviceMemoryBase result,
executable->ExecuteOnStream(&service_run_options, arguments,
/*hlo_execution_profile=*/nullptr));
- TF_RETURN_IF_ERROR(stream.BlockHostUntilDoneWithStatus());
+ TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
allocations_.push_back(result);
// Wait for all executions to complete.
for (int64 i = 0; i < streams.size(); ++i) {
- Status block_status = streams[i]->BlockHostUntilDoneWithStatus();
+ Status block_status = streams[i]->BlockHostUntilDone();
if (!block_status.ok()) {
return InternalError("failed to complete execution for stream %lld: %s",
i, block_status.error_message().c_str());
auto out_gpu_mem = AsDeviceMemory(out_gpu.flat<float>().data());
stream->ThenMemcpy(out_cpu.flat<float>().data(), out_gpu_mem,
out_cpu.TotalBytes());
- SE_ASSERT_OK(stream->BlockHostUntilDoneWithStatus());
+ SE_ASSERT_OK(stream->BlockHostUntilDone());
test::ExpectTensorEqual<float>(test_case->expected, out_cpu);
}
}
for (int i = 0; i < num_ranks; ++i) {
auto* device = devices->at(i % devices->size());
auto* stream = device->tensorflow_gpu_device_info()->stream;
- SE_ASSERT_OK(stream->BlockHostUntilDoneWithStatus());
+ SE_ASSERT_OK(stream->BlockHostUntilDone());
}
std::random_shuffle(case_and_device_num.begin(), case_and_device_num.end());
if (!dev_info) {
return errors::Internal("Failed to find dest device GPUDeviceInfo");
}
- return dev_info->stream->BlockHostUntilDoneWithStatus();
+ return dev_info->stream->BlockHostUntilDone();
}
Status GPUUtil::SyncAll(Device* gpu_device) {
const auto& dimensions = input_dimensions[i];
tmp.resize(dimensions.ElementCount());
stream->ThenMemcpyD2H<float>(*input_data[i], &tmp);
- port::Status block_status = stream->BlockHostUntilDoneWithStatus();
+ port::Status block_status = stream->BlockHostUntilDone();
if (!block_status.ok()) {
LOG(ERROR) << "BlockHostUntilDone failed: " << block_status;
return false;