break;
case Caffe::GPU:
cudaMemcpy(input_blobs[i]->mutable_gpu_data(), data_ptr,
- sizeof(float) * input_blobs[i]->count(), cudaMemcpyHostToDevice);
+ sizeof(float) * input_blobs[i]->count(), cudaMemcpyDefault);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, output_blobs[i]->gpu_data(),
- sizeof(float) * output_blobs[i]->count(), cudaMemcpyDeviceToHost);
+ sizeof(float) * output_blobs[i]->count(), cudaMemcpyDefault);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
break;
case Caffe::GPU:
cudaMemcpy(output_blobs[i]->mutable_gpu_diff(), data_ptr,
- sizeof(float) * output_blobs[i]->count(), cudaMemcpyHostToDevice);
+ sizeof(float) * output_blobs[i]->count(), cudaMemcpyDefault);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, input_blobs[i]->gpu_diff(),
- sizeof(float) * input_blobs[i]->count(), cudaMemcpyDeviceToHost);
+ sizeof(float) * input_blobs[i]->count(), cudaMemcpyDefault);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
break;
case Caffe::GPU:
CUDA_CHECK(cudaMemcpy(weights_ptr, layer_blobs[j]->gpu_data(),
- sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDeviceToHost));
+ sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDefault));
break;
default:
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
case Caffe::GPU:
if (copy_diff) {
CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(),
- sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
+ sizeof(Dtype) * count_, cudaMemcpyDefault));
} else {
CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(),
- sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
+ sizeof(Dtype) * count_, cudaMemcpyDefault));
}
break;
case Caffe::CPU:
// Copy the data
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
if (output_labels_) {
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
}
// Start a new prefetch thread
CreatePrefetchThread();
&(*top)[0]->mutable_gpu_data()[i * data_count],
&data_blob_.cpu_data()[current_row_ * data_count],
sizeof(Dtype) * data_count,
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
CUDA_CHECK(cudaMemcpy(
&(*top)[1]->mutable_gpu_data()[i * label_data_count],
&label_blob_.cpu_data()[current_row_ * label_data_count],
sizeof(Dtype) * label_data_count,
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
}
return Dtype(0.);
}
for (int i = 0; i < bottom[0]->num(); ++i) {
CUDA_CHECK(cudaMemcpy(&data_blob_.mutable_cpu_data()[i * data_datum_dim],
&bottom[0]->gpu_data()[i * data_datum_dim],
- sizeof(Dtype) * data_datum_dim, cudaMemcpyDeviceToHost));
+ sizeof(Dtype) * data_datum_dim, cudaMemcpyDefault));
CUDA_CHECK(cudaMemcpy(&label_blob_.mutable_cpu_data()[i * label_datum_dim],
&bottom[1]->gpu_data()[i * label_datum_dim],
- sizeof(Dtype) * label_datum_dim, cudaMemcpyDeviceToHost));
+ sizeof(Dtype) * label_datum_dim, cudaMemcpyDefault));
}
SaveBlobs();
return Dtype(0.);
// Copy the data
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
// Start a new prefetch thread
CreatePrefetchThread();
return Dtype(0.);
// Copy the data
CUDA_CHECK(cudaMemcpy((*top)[0]->mutable_gpu_data(),
prefetch_data_->cpu_data(), sizeof(Dtype) * prefetch_data_->count(),
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
CUDA_CHECK(cudaMemcpy((*top)[1]->mutable_gpu_data(),
prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
- cudaMemcpyHostToDevice));
+ cudaMemcpyDefault));
// Start a new prefetch thread
CreatePrefetchThread();
return Dtype(0.);
CaffeMallocHost(&cpu_ptr_, size_);
own_cpu_data_ = true;
}
- CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost));
+ CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDefault));
head_ = SYNCED;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
}
- CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice));
+ CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyDefault));
head_ = SYNCED;
break;
case HEAD_AT_GPU:
// check if values are the same
char* recovered_value = new char[10];
cudaMemcpy(reinterpret_cast<void*>(recovered_value), gpu_data, 10,
- cudaMemcpyDeviceToHost);
+ cudaMemcpyDefault);
for (int i = 0; i < mem.size(); ++i) {
EXPECT_EQ((reinterpret_cast<char*>(recovered_value))[i], 1);
}
EXPECT_EQ(mem.head(), SyncedMemory::SYNCED);
// check if values are the same
cudaMemcpy(reinterpret_cast<void*>(recovered_value), gpu_data, 10,
- cudaMemcpyDeviceToHost);
+ cudaMemcpyDefault);
for (int i = 0; i < mem.size(); ++i) {
EXPECT_EQ((reinterpret_cast<char*>(recovered_value))[i], 2);
}