Blob<Dtype> scale_;
};
+#ifdef USE_CUDNN
+/**
+ * @brief cuDNN implementation of SoftmaxLayer.
+ * Fallback to SoftmaxLayer for CPU mode.
+ */
+template <typename Dtype>
+class CuDNNSoftmaxLayer : public SoftmaxLayer<Dtype> {
+ public:
+ explicit CuDNNSoftmaxLayer(const LayerParameter& param)
+ : SoftmaxLayer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+ virtual ~CuDNNSoftmaxLayer();
+
+ protected:
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+
+ cudnnHandle_t handle_;
+ cudnnTensor4dDescriptor_t bottom_desc_;
+ cudnnTensor4dDescriptor_t top_desc_;
+};
+#endif
+
/**
* @brief Creates a "split" path in the network by copying the bottom Blob
* into multiple top Blob%s to be used by multiple consuming layers.
}
if (engine == SoftmaxParameter_Engine_CAFFE) {
return new SoftmaxLayer<Dtype>(param);
+#ifdef USE_CUDNN
+ } else if (engine == SoftmaxParameter_Engine_CUDNN) {
+ return new CuDNNSoftmaxLayer<Dtype>(param);
+#endif
} else {
LOG(FATAL) << "Layer " << name << " has unknown engine.";
}
--- /dev/null
+#ifdef USE_CUDNN
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "thrust/device_vector.h"
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNSoftmaxLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ SoftmaxLayer<Dtype>::LayerSetUp(bottom, top);
+ // Initialize CUDNN.
+ cudnnStatus_t stat = cudnnCreate(&handle_);
+ CHECK_EQ(stat,CUDNN_STATUS_SUCCESS) << "Could not create a CUDNN handle.";
+ int N = bottom[0]->num();
+ int K = bottom[0]->channels();
+ int H = bottom[0]->height();
+ int W = bottom[0]->width();
+ cudnn::createTensor4dDesc<Dtype>(&bottom_desc_, N, K, H, W);
+ cudnn::createTensor4dDesc<Dtype>(&top_desc_, N, K, H, W);
+}
+
+template <typename Dtype>
+CuDNNSoftmaxLayer<Dtype>::~CuDNNSoftmaxLayer() {
+ cudnnDestroyTensor4dDescriptor(bottom_desc_);
+ cudnnDestroyTensor4dDescriptor(top_desc_);
+ cudnnDestroy(handle_);
+}
+
+INSTANTIATE_CLASS(CuDNNSoftmaxLayer);
+
+} // namespace caffe
+#endif
--- /dev/null
+#ifdef USE_CUDNN
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "thrust/device_vector.h"
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNSoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = (*top)[0]->mutable_gpu_data();
+ cudnnStatus_t stat = cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE,
+ CUDNN_SOFTMAX_MODE_CHANNEL,
+ bottom_desc_, bottom_data, top_desc_, top_data);
+ CHECK_EQ(stat,CUDNN_STATUS_SUCCESS) << "Error in cudnnSoftmaxBackward.";
+}
+
+template <typename Dtype>
+void CuDNNSoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+ if (propagate_down[0]) {
+ const Dtype* top_data = top[0]->gpu_data();
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ cudnnStatus_t stat = cudnnSoftmaxBackward(handle_, CUDNN_SOFTMAX_ACCURATE,
+ CUDNN_SOFTMAX_MODE_CHANNEL,
+ top_desc_, top_data, top_desc_, top_diff, bottom_desc_, bottom_diff));
+ CHECK_EQ(stat,CUDNN_STATUS_SUCCESS) << "Error in cudnnSoftmaxBackward.";
+ }
+}
+
+INSTANTIATE_CLASS(CuDNNSoftmaxLayer);
+
+} // namespace caffe
+#endif
-//
#include <algorithm>
#include <vector>
&(this->blob_top_vec_));
}
+#ifdef USE_CUDNN
+template <typename Dtype>
+class CuDNNSoftmaxLayerTest : public ::testing::Test {
+ protected:
+ CuDNNSoftmaxLayerTest()
+ : blob_bottom_(new Blob<Dtype>(2, 10, 2, 3)),
+ blob_top_(new Blob<Dtype>()) {
+ // fill the values
+ FillerParameter filler_param;
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(this->blob_bottom_);
+ blob_bottom_vec_.push_back(blob_bottom_);
+ blob_top_vec_.push_back(blob_top_);
+ }
+ virtual ~CuDNNSoftmaxLayerTest() { delete blob_bottom_; delete blob_top_; }
+ Blob<Dtype>* const blob_bottom_;
+ Blob<Dtype>* const blob_top_;
+ vector<Blob<Dtype>*> blob_bottom_vec_;
+ vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(CuDNNSoftmaxLayerTest, TestDtypes);
+
+TYPED_TEST(CuDNNSoftmaxLayerTest, TestForwardCuDNN) {
+ Caffe::set_mode(Caffe::GPU);
+ LayerParameter layer_param;
+ CuDNNSoftmaxLayer<TypeParam> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // Test sum
+ for (int i = 0; i < this->blob_bottom_->num(); ++i) {
+ for (int k = 0; k < this->blob_bottom_->height(); ++k) {
+ for (int l = 0; l < this->blob_bottom_->width(); ++l) {
+ TypeParam sum = 0;
+ for (int j = 0; j < this->blob_top_->channels(); ++j) {
+ sum += this->blob_top_->data_at(i, j, k, l);
+ }
+ EXPECT_GE(sum, 0.999);
+ EXPECT_LE(sum, 1.001);
+ // Test exact values
+ TypeParam scale = 0;
+ for (int j = 0; j < this->blob_bottom_->channels(); ++j) {
+ scale += exp(this->blob_bottom_->data_at(i, j, k, l));
+ }
+ for (int j = 0; j < this->blob_bottom_->channels(); ++j) {
+ EXPECT_GE(this->blob_top_->data_at(i, j, k, l) + 1e-4,
+ exp(this->blob_bottom_->data_at(i, j, k, l)) / scale)
+ << "debug: " << i << " " << j;
+ EXPECT_LE(this->blob_top_->data_at(i, j, k, l) - 1e-4,
+ exp(this->blob_bottom_->data_at(i, j, k, l)) / scale)
+ << "debug: " << i << " " << j;
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(CuDNNSoftmaxLayerTest, TestGradientCuDNN) {
+ Caffe::set_mode(Caffe::GPU);
+ LayerParameter layer_param;
+ CuDNNSoftmaxLayer<TypeParam> layer(layer_param);
+ GradientChecker<TypeParam> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
+ &(this->blob_top_vec_));
+}
+
+#endif
+
} // namespace caffe