namespace {
-// TODO(csigg): remove dnn namespace qualifier from the RNN code below.
-using ::stream_executor::dnn::BatchDescriptor;
-using ::stream_executor::dnn::ConvolutionDescriptor;
-using ::stream_executor::dnn::FilterDescriptor;
-using ::stream_executor::dnn::NormalizeDescriptor;
-using ::stream_executor::dnn::PoolingDescriptor;
-
// Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion.
template <typename WideT, typename NarrowT>
// Turns a BatchDescriptor structure into a cudnn tensor handle within a scope.
class ScopedTensorDescriptor {
public:
- ScopedTensorDescriptor(const BatchDescriptor& batch_descriptor,
+ ScopedTensorDescriptor(const dnn::BatchDescriptor& batch_descriptor,
cudnnDataType_t elem_type)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateTensorDescriptor(&handle_);
// Turns a FilterDescriptor structure into a cudnn filter handle within a scope.
class ScopedFilterDescriptor {
public:
- ScopedFilterDescriptor(const FilterDescriptor& filter_descriptor,
+ ScopedFilterDescriptor(const dnn::FilterDescriptor& filter_descriptor,
cudnnDataType_t elem_type)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateFilterDescriptor(&handle_);
class ScopedConvolutionDescriptor {
public:
ScopedConvolutionDescriptor(
- const ConvolutionDescriptor& convolution_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
cudnnDataType_t data_type)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateConvolutionDescriptor(&handle_);
// within a scope.
class ScopedPoolingDescriptor {
public:
- explicit ScopedPoolingDescriptor(const PoolingDescriptor& pooling_descriptor)
+ explicit ScopedPoolingDescriptor(
+ const dnn::PoolingDescriptor& pooling_descriptor)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreatePoolingDescriptor(&handle_);
if (status != CUDNN_STATUS_SUCCESS) {
class ScopedNormalizeDescriptor {
public:
explicit ScopedNormalizeDescriptor(
- const NormalizeDescriptor& normalize_descriptor)
+ const dnn::NormalizeDescriptor& normalize_descriptor)
: handle_(nullptr) {
cudnnStatus_t status = cudnnCreateLRNDescriptor(&handle_);
if (status != CUDNN_STATUS_SUCCESS) {
template <class T>
bool CudnnSupport::DoConvolveImpl(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<T>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<T>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
}
bool CudnnSupport::DoConvolve(
- Stream* stream, const BatchDescriptor& batch_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<float>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor, DeviceMemory<float>* output_data,
- ScratchAllocator* scratch_allocator,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<float>(
}
bool CudnnSupport::DoConvolve(
- Stream* stream, const BatchDescriptor& batch_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<double>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor, DeviceMemory<double>* output_data,
- ScratchAllocator* scratch_allocator,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
+ DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return DoConvolveImpl<double>(
}
bool CudnnSupport::DoConvolve(
- Stream* stream, const BatchDescriptor& batch_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
const DeviceMemory<Eigen::half>& input_data,
- const FilterDescriptor& filter_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& output_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
template <class T>
DeviceMemory<T> MaybeTransformLayout(
Stream* stream, const CudnnHandle& cudnn,
- BatchDescriptor* output_descriptor, DeviceMemory<T> backward_output_data,
+ dnn::BatchDescriptor* output_descriptor,
+ DeviceMemory<T> backward_output_data,
std::unique_ptr<TemporaryDeviceMemory<T>>* transform_scratch) {
if (output_descriptor->layout() == dnn::DataLayout::kBatchDepthYX) {
return backward_output_data;
*transform_scratch =
stream->AllocateTemporaryArray<T>(backward_output_data.ElementCount())
.ConsumeValueOrDie();
- BatchDescriptor transformed_output_descriptor;
+ dnn::BatchDescriptor transformed_output_descriptor;
transformed_output_descriptor.CloneFrom(*output_descriptor);
transformed_output_descriptor.set_layout(dnn::DataLayout::kBatchDepthYX);
cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
template <class T>
bool CudnnSupport::DoConvolveBackwardDataImpl(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<T>& filter_data,
- const BatchDescriptor& output_descriptor_in,
+ const dnn::BatchDescriptor& output_descriptor_in,
DeviceMemory<T> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<T>* backward_input_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
auto cudnn = cudnn_->GetHandle(parent_, stream);
// TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass.
- BatchDescriptor output_descriptor;
+ dnn::BatchDescriptor output_descriptor;
output_descriptor.CloneFrom(output_descriptor_in);
std::unique_ptr<TemporaryDeviceMemory<T>> transform_scratch;
backward_output_data =
}
bool CudnnSupport::DoConvolveBackwardData(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<double>& filter_data,
- const BatchDescriptor& output_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<double> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<double>* backward_input_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
}
bool CudnnSupport::DoConvolveBackwardData(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<float>& filter_data,
- const BatchDescriptor& output_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<float> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<float>* backward_input_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
}
bool CudnnSupport::DoConvolveBackwardData(
- Stream* stream, const FilterDescriptor& filter_descriptor,
+ Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
- const BatchDescriptor& output_descriptor,
+ const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half> backward_output_data,
- const ConvolutionDescriptor& convolution_descriptor,
- const BatchDescriptor& input_descriptor,
+ const dnn::ConvolutionDescriptor& convolution_descriptor,
+ const dnn::BatchDescriptor& input_descriptor,
DeviceMemory<Eigen::half>* backward_input_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
auto cudnn = cudnn_->GetHandle(parent_, stream);
// TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass.
- BatchDescriptor output_descriptor;
+ dnn::BatchDescriptor output_descriptor;
output_descriptor.CloneFrom(output_descriptor_in);
std::unique_ptr<TemporaryDeviceMemory<T>> transform_scratch;
backward_output_data =
}
bool CudnnSupport::DoConvolveBackwardBias(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<double>& input_data,
- const BatchDescriptor& bias_descriptor,
+ const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<double>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
bool CudnnSupport::DoConvolveBackwardBias(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
- const BatchDescriptor& bias_descriptor,
+ const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<float>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
bool CudnnSupport::DoConvolveBackwardBias(
- Stream* stream, const BatchDescriptor& input_descriptor,
+ Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<Eigen::half>& input_data,
- const BatchDescriptor& bias_descriptor,
+ const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<Eigen::half>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, input_descriptor, input_data,
bias_descriptor, backward_bias_data);
DeviceMemory<float>* output_data) {
ScopedTensorDescriptor input_descriptor(dimensions, CUDNN_DATA_FLOAT);
- BatchDescriptor bias_dimensions;
+ dnn::BatchDescriptor bias_dimensions;
bias_dimensions.set_count(1)
.set_feature_map_count(dimensions.feature_map_count())
.set_height(1)
}
bool CudnnSupport::DeriveOutputBatchDescriptor(
- const BatchDescriptor& batch_descriptor,
- const FilterDescriptor& filter_descriptor,
+ const dnn::BatchDescriptor& batch_descriptor,
+ const dnn::FilterDescriptor& filter_descriptor,
const dnn::ConvolutionDescriptor& convolution_descriptor,
dnn::BatchDescriptor* output_batch_descriptor) {
ScopedTensorDescriptor input_nd(batch_descriptor, CUDNN_DATA_FLOAT);