strategize cuDNN softmax
authorEvan Shelhamer <shelhamer@imaginarynumber.net>
Sat, 6 Sep 2014 06:53:04 +0000 (23:53 -0700)
committerEvan Shelhamer <shelhamer@imaginarynumber.net>
Sun, 7 Sep 2014 17:56:15 +0000 (19:56 +0200)
include/caffe/common_layers.hpp
src/caffe/layer_factory.cpp
src/caffe/layers/cudnn_softmax_layer.cpp [new file with mode: 0644]
src/caffe/layers/cudnn_softmax_layer.cu [new file with mode: 0644]
src/caffe/layers/softmax_layer.cpp
src/caffe/test/test_softmax_layer.cpp

index 3753592..c170742 100644 (file)
@@ -375,6 +375,32 @@ class SoftmaxLayer : public Layer<Dtype> {
   Blob<Dtype> scale_;
 };
 
+#ifdef USE_CUDNN
+/**
+ * @brief cuDNN implementation of SoftmaxLayer.
+ *        Fallback to SoftmaxLayer for CPU mode.
+ */
+template <typename Dtype>
+class CuDNNSoftmaxLayer : public SoftmaxLayer<Dtype> {
+ public:
+  explicit CuDNNSoftmaxLayer(const LayerParameter& param)
+      : SoftmaxLayer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual ~CuDNNSoftmaxLayer();
+
+ protected:
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  cudnnHandle_t             handle_;
+  cudnnTensor4dDescriptor_t bottom_desc_;
+  cudnnTensor4dDescriptor_t top_desc_;
+};
+#endif
+
 /**
  * @brief Creates a "split" path in the network by copying the bottom Blob
  *        into multiple top Blob%s to be used by multiple consuming layers.
index c519485..41c547b 100644 (file)
@@ -157,6 +157,10 @@ SoftmaxLayer<Dtype>* GetSoftmaxLayer(const string& name,
   }
   if (engine == SoftmaxParameter_Engine_CAFFE) {
     return new SoftmaxLayer<Dtype>(param);
+#ifdef USE_CUDNN
+  } else if (engine == SoftmaxParameter_Engine_CUDNN) {
+    return new CuDNNSoftmaxLayer<Dtype>(param);
+#endif
   } else {
     LOG(FATAL) << "Layer " << name << " has unknown engine.";
   }
diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp
new file mode 100644 (file)
index 0000000..58b3a78
--- /dev/null
@@ -0,0 +1,39 @@
+#ifdef USE_CUDNN
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "thrust/device_vector.h"
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNSoftmaxLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  SoftmaxLayer<Dtype>::LayerSetUp(bottom, top);
+  // Initialize CUDNN.
+  cudnnStatus_t stat = cudnnCreate(&handle_);
+  CHECK_EQ(stat,CUDNN_STATUS_SUCCESS) << "Could not create a CUDNN handle.";
+  int N = bottom[0]->num();
+  int K = bottom[0]->channels();
+  int H = bottom[0]->height();
+  int W = bottom[0]->width();
+  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_, N, K, H, W);
+  cudnn::createTensor4dDesc<Dtype>(&top_desc_, N, K, H, W);
+}
+
+template <typename Dtype>
+CuDNNSoftmaxLayer<Dtype>::~CuDNNSoftmaxLayer() {
+  cudnnDestroyTensor4dDescriptor(bottom_desc_);
+  cudnnDestroyTensor4dDescriptor(top_desc_);
+  cudnnDestroy(handle_);
+}
+
+INSTANTIATE_CLASS(CuDNNSoftmaxLayer);
+
+}  // namespace caffe
+#endif
diff --git a/src/caffe/layers/cudnn_softmax_layer.cu b/src/caffe/layers/cudnn_softmax_layer.cu
new file mode 100644 (file)
index 0000000..c333774
--- /dev/null
@@ -0,0 +1,43 @@
+#ifdef USE_CUDNN
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "thrust/device_vector.h"
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void CuDNNSoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = (*top)[0]->mutable_gpu_data();
+  cudnnStatus_t stat = cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE,
+      CUDNN_SOFTMAX_MODE_CHANNEL,
+      bottom_desc_, bottom_data, top_desc_, top_data);
+  CHECK_EQ(stat,CUDNN_STATUS_SUCCESS) << "Error in cudnnSoftmaxBackward.";
+}
+
+template <typename Dtype>
+void CuDNNSoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  if (propagate_down[0]) {
+    const Dtype* top_data = top[0]->gpu_data();
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+    Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+    cudnnStatus_t stat = cudnnSoftmaxBackward(handle_, CUDNN_SOFTMAX_ACCURATE,
+        CUDNN_SOFTMAX_MODE_CHANNEL,
+        top_desc_, top_data, top_desc_, top_diff, bottom_desc_, bottom_diff));
+    CHECK_EQ(stat,CUDNN_STATUS_SUCCESS) << "Error in cudnnSoftmaxBackward.";
+  }
+}
+
+INSTANTIATE_CLASS(CuDNNSoftmaxLayer);
+
+}  // namespace caffe
+#endif
index 29767ac..952db74 100644 (file)
@@ -1,4 +1,3 @@
-//
 #include <algorithm>
 #include <vector>
 
index 9f45f76..41f643f 100644 (file)
@@ -80,4 +80,72 @@ TYPED_TEST(SoftmaxLayerTest, TestGradient) {
       &(this->blob_top_vec_));
 }
 
+#ifdef USE_CUDNN
+template <typename Dtype>
+class CuDNNSoftmaxLayerTest : public ::testing::Test {
+ protected:
+  CuDNNSoftmaxLayerTest()
+      : blob_bottom_(new Blob<Dtype>(2, 10, 2, 3)),
+        blob_top_(new Blob<Dtype>()) {
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+  virtual ~CuDNNSoftmaxLayerTest() { delete blob_bottom_; delete blob_top_; }
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(CuDNNSoftmaxLayerTest, TestDtypes);
+
+TYPED_TEST(CuDNNSoftmaxLayerTest, TestForwardCuDNN) {
+  Caffe::set_mode(Caffe::GPU);
+  LayerParameter layer_param;
+  CuDNNSoftmaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // Test sum
+  for (int i = 0; i < this->blob_bottom_->num(); ++i) {
+    for (int k = 0; k < this->blob_bottom_->height(); ++k) {
+      for (int l = 0; l < this->blob_bottom_->width(); ++l) {
+        TypeParam sum = 0;
+        for (int j = 0; j < this->blob_top_->channels(); ++j) {
+          sum += this->blob_top_->data_at(i, j, k, l);
+        }
+        EXPECT_GE(sum, 0.999);
+        EXPECT_LE(sum, 1.001);
+        // Test exact values
+        TypeParam scale = 0;
+        for (int j = 0; j < this->blob_bottom_->channels(); ++j) {
+          scale += exp(this->blob_bottom_->data_at(i, j, k, l));
+        }
+        for (int j = 0; j < this->blob_bottom_->channels(); ++j) {
+          EXPECT_GE(this->blob_top_->data_at(i, j, k, l) + 1e-4,
+              exp(this->blob_bottom_->data_at(i, j, k, l)) / scale)
+              << "debug: " << i << " " << j;
+          EXPECT_LE(this->blob_top_->data_at(i, j, k, l) - 1e-4,
+              exp(this->blob_bottom_->data_at(i, j, k, l)) / scale)
+              << "debug: " << i << " " << j;
+        }
+      }
+    }
+  }
+}
+
+TYPED_TEST(CuDNNSoftmaxLayerTest, TestGradientCuDNN) {
+  Caffe::set_mode(Caffe::GPU);
+  LayerParameter layer_param;
+  CuDNNSoftmaxLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-3);
+  checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
+#endif
+
 }  // namespace caffe