#include "cuda/include/cublas_v2.h"
#include "cuda/include/cuda.h"
-#if CUDA_VERSION >= 8000
#define SE_CUDA_DATA_HALF CUDA_R_16F
-#else
-#define SE_CUDA_DATA_HALF CUBLAS_DATA_HALF
-#endif
#include "tensorflow/stream_executor/cuda/cuda_blas.h"
// approach when the issue is fixed.
#if CUDA_VERSION < 9000
#include "cuda/include/cuda_fp16.h"
-#if CUDA_VERSION >= 7050
#define EIGEN_HAS_CUDA_FP16
#endif
-#endif
#include "third_party/eigen3/Eigen/Core"
// blas::ComputationType to a cudaDataType_t.
//
// These are used to build the argument type and computation type args to
-// cublasGemmEx. cublasGemmEx and cudaDataType_t are available only on
-// CUDA >= 8.0.
-#if CUDA_VERSION >= 8000
+// cublasGemmEx.
template <typename T>
struct CUDADataType;
return CUDA_C_64F;
}
}
-#endif
-
} // namespace
template <typename FuncT, typename... Args>
// Note that when CUDA version and compute capability is not sufficient, we
// still return the out_algorithms. Caller needs to make sure that in this case,
// the returned vector is empty.
-#if CUDA_VERSION >= 8000
for (cublasGemmAlgo_t algo : {
CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1,
CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3, CUBLAS_GEMM_ALGO4,
}) {
out_algorithms->push_back(algo);
}
-#endif
return true;
}
#include "tensorflow/stream_executor/lib/env.h"
#include "tensorflow/stream_executor/lib/error.h"
#include "tensorflow/stream_executor/lib/human_readable.h"
+#include "tensorflow/stream_executor/lib/inlined_vector.h"
#include "tensorflow/stream_executor/lib/notification.h"
-#include "tensorflow/stream_executor/lib/threadpool.h"
#include "tensorflow/stream_executor/lib/stacktrace.h"
#include "tensorflow/stream_executor/lib/static_threadlocal.h"
#include "tensorflow/stream_executor/lib/strcat.h"
#include "tensorflow/stream_executor/lib/stringprintf.h"
+#include "tensorflow/stream_executor/lib/threadpool.h"
#include "tensorflow/stream_executor/platform/logging.h"
#include "tensorflow/stream_executor/platform/mutex.h"
#include "tensorflow/stream_executor/platform/port.h"
-#include "tensorflow/stream_executor/lib/inlined_vector.h"
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
case 719:
return "CUDA_ERROR_LAUNCH_FAILED";
- OSTREAM_CUDA_ERROR(CONTEXT_ALREADY_IN_USE)
- OSTREAM_CUDA_ERROR(PEER_ACCESS_UNSUPPORTED)
- OSTREAM_CUDA_ERROR(NOT_PERMITTED)
- OSTREAM_CUDA_ERROR(NOT_SUPPORTED)
- OSTREAM_CUDA_ERROR(UNKNOWN) // Unknown internal error to CUDA.
+ OSTREAM_CUDA_ERROR(CONTEXT_ALREADY_IN_USE)
+ OSTREAM_CUDA_ERROR(PEER_ACCESS_UNSUPPORTED)
+ OSTREAM_CUDA_ERROR(NOT_PERMITTED)
+ OSTREAM_CUDA_ERROR(NOT_SUPPORTED)
+ OSTREAM_CUDA_ERROR(UNKNOWN) // Unknown internal error to CUDA.
default:
return port::StrCat("CUresult(", static_cast<int>(result), ")");
}
}
/* static */ port::Status CUDADriver::CreateContext(
- CUdevice device, DeviceOptions device_options, CudaContext** context) {
+ CUdevice device, const DeviceOptions &device_options,
+ CudaContext **context) {
*context = nullptr;
int flags = 0;
CUresult res;
CUcontext former_context;
CUcontext new_context;
- {
- // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their
- // context creation: see http://b/13248943
-#if CUDA_VERSION >= 7000
- {
- unsigned int former_primary_context_flags;
- int former_primary_context_is_active;
- CHECK_EQ(CUDA_SUCCESS,
- cuDevicePrimaryCtxGetState(device, &former_primary_context_flags,
- &former_primary_context_is_active));
- if (former_primary_context_flags != flags) {
- if (former_primary_context_is_active) {
- LOG(ERROR)
- << "The primary context is active and has a different flag set ("
- << former_primary_context_flags << ") than the desired flag set ("
- << flags << ").";
- } else {
- CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags));
- }
- }
+ unsigned int former_primary_context_flags;
+ int former_primary_context_is_active;
+ CHECK_EQ(CUDA_SUCCESS,
+ cuDevicePrimaryCtxGetState(device, &former_primary_context_flags,
+ &former_primary_context_is_active));
+ if (former_primary_context_flags != flags) {
+ if (former_primary_context_is_active) {
+ LOG(ERROR)
+ << "The primary context is active and has a different flag set ("
+ << former_primary_context_flags << ") than the desired flag set ("
+ << flags << ").";
+ } else {
+ CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags));
}
+ }
- former_context = CUDADriver::CurrentContextOrDie();
- res = cuDevicePrimaryCtxRetain(&new_context, device);
- if (former_context != nullptr) {
- CUdevice former_device;
- if (cuCtxGetDevice(&former_device) == CUDA_SUCCESS) {
- if (former_device == device) {
- if (former_context == new_context) {
- VLOG(2) << "The primary context " << former_context
- << " for device " << device
- << " exists before initializing the StreamExecutor.";
- } else {
- LOG(WARNING)
- << "A non-primary context " << former_context << " for device "
- << device
- << " exists before initializing the StreamExecutor. The "
- << "primary context is now " << new_context << ". We "
- << "haven't verified StreamExecutor works with that.";
- }
+ former_context = CUDADriver::CurrentContextOrDie();
+ res = cuDevicePrimaryCtxRetain(&new_context, device);
+ if (former_context != nullptr) {
+ CUdevice former_device;
+ if (cuCtxGetDevice(&former_device) == CUDA_SUCCESS) {
+ if (former_device == device) {
+ if (former_context == new_context) {
+ VLOG(2) << "The primary context " << former_context << " for device "
+ << device
+ << " exists before initializing the StreamExecutor.";
+ } else {
+ LOG(WARNING) << "A non-primary context " << former_context
+ << " for device " << device
+ << " exists before initializing the StreamExecutor. The "
+ << "primary context is now " << new_context << ". We "
+ << "haven't verified StreamExecutor works with that.";
}
- } else {
- LOG(ERROR) << "Failed to get the device of the current context "
- << former_context;
}
+ } else {
+ LOG(ERROR) << "Failed to get the device of the current context "
+ << former_context;
}
-#else
- former_context = CurrentContext();
- if (former_context != nullptr) {
- LOG(WARNING)
- << "creating context when one is currently active; existing: "
- << former_context;
- }
- res = cuCtxCreate(&new_context, flags, device);
-#endif
}
CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(former_context));
return port::Status::OK();
}
-#if CUDA_VERSION >= 7000
string message = "failed call to cuDevicePrimaryCtxRetain: " + ToString(res);
-#else
- string message = "failed call to cuCtxCreate: " + ToString(res);
-#endif
if (res == CUDA_ERROR_OUT_OF_MEMORY) {
uint64 total_memory;
if (GetDeviceTotalMemory(device, &total_memory)) {
if (context == nullptr) {
return;
}
-#if CUDA_VERSION >= 7000
CUcontext former_context = CurrentContext();
CUresult res = cuCtxSetCurrent(context->context());
CUdevice device;
cuCtxSetCurrent(former_context);
res = cuDevicePrimaryCtxRelease(device);
-#else
- CUresult res = cuCtxDestroy(context->context());
-#endif
if (res != CUDA_SUCCESS) {
LOG(ERROR) << "failed to release CUDA context; leaking: " << ToString(res);