Test for im2col kernel
authorJames Thewlis <jamt9000@gmail.com>
Thu, 26 Jun 2014 17:12:30 +0000 (18:12 +0100)
committerJames Thewlis <jamt9000@gmail.com>
Fri, 27 Jun 2014 09:15:41 +0000 (10:15 +0100)
With associated Makefile changes for .cu tests

This tests that the grid-stride loop works for im2col,
using the CPU version as a reference.

Makefile
src/caffe/test/test_im2col_kernel.cu [new file with mode: 0644]

index 77e2ff5..1407274 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -30,11 +30,12 @@ CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
 # HXX_SRCS are the header files
 HXX_SRCS := $(shell find include/$(PROJECT) -name "*.hpp")
 # CU_SRCS are the cuda source files
-CU_SRCS := $(shell find src/$(PROJECT) -name "*.cu")
+CU_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cu" -name "*.cu")
 # TEST_SRCS are the test source files
 TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp
 TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp")
 TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS))
+TEST_CU_SRCS := $(shell find src/$(PROJECT) -name "test_*.cu")
 GTEST_SRC := src/gtest/gtest-all.cpp
 # TEST_HDRS are the test header files
 TEST_HDRS := $(shell find src/$(PROJECT) -name "test_*.hpp")
@@ -101,7 +102,9 @@ OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS)
 TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o})
 TOOL_BUILD_DIR := $(BUILD_DIR)/tools
 TEST_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test
-TEST_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
+TEST_CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
+TEST_CU_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_CU_SRCS:.cu=.cuo})
+TEST_OBJS := $(TEST_CXX_OBJS) $(TEST_CU_OBJS)
 GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o})
 GTEST_BUILD_DIR := $(dir $(GTEST_OBJ))
 EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o})
@@ -329,13 +332,18 @@ $(TEST_BUILD_DIR)/%.o: src/$(PROJECT)/test/%.cpp $(HXX_SRCS) $(TEST_HDRS) \
        $(CXX) $< $(CXXFLAGS) -c -o $@
        @ echo
 
+$(TEST_BUILD_DIR)/%.cuo: src/$(PROJECT)/test/%.cu $(HXX_SRCS) $(TEST_HDRS) \
+               | $(TEST_BUILD_DIR)
+       $(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
+       @ echo
+
 $(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
                | $(TEST_BIN_DIR)
        $(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \
                -o $@ $(LINKFLAGS) $(LDFLAGS)
        @ echo
 
-$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.o $(GTEST_OBJ) $(STATIC_NAME) \
+$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.*o $(GTEST_OBJ) $(STATIC_NAME) \
                | $(TEST_BIN_DIR)
        $(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) $(STATIC_NAME) \
                -o $@ $(LINKFLAGS) $(LDFLAGS)
diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu
new file mode 100644 (file)
index 0000000..f49a5b7
--- /dev/null
@@ -0,0 +1,125 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <cstring>
+#include <vector>
+
+#include "cuda_runtime.h"
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/im2col.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+// Forward declare kernel functions
+template <typename Dtype>
+__global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
+    const int height, const int width, const int ksize, const int pad,
+    const int stride, const int height_col, const int width_col,
+    Dtype* data_col);
+
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class Im2colKernelTest : public ::testing::Test {
+ protected:
+  Im2colKernelTest()
+        // big so launches > 1024 threads
+      : blob_bottom_(new Blob<Dtype>(5, 500, 10, 10)),
+        blob_top_(new Blob<Dtype>()),
+        blob_top_cpu_(new Blob<Dtype>()) {
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+
+    height_ = blob_bottom_->height();
+    width_ = blob_bottom_->width();
+    channels_ = blob_bottom_->channels();
+    pad_ = 0;
+    stride_ = 2;
+    kernel_size_ = 3;
+    height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
+    width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
+  }
+
+  virtual ~Im2colKernelTest() {
+      delete blob_bottom_;
+      delete blob_top_;
+      delete blob_top_cpu_;
+  }
+
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  Blob<Dtype>* const blob_top_cpu_;
+  int height_;
+  int width_;
+  int channels_;
+  int pad_;
+  int stride_;
+  int kernel_size_;
+  int height_col_;
+  int width_col_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(Im2colKernelTest, Dtypes);
+
+TYPED_TEST(Im2colKernelTest, TestGPU) {
+  Caffe::set_mode(Caffe::GPU);
+
+  // Reshape the blobs to correct size for im2col output
+  this->blob_top_->Reshape(this->blob_bottom_->num(),
+          this->channels_ * this->kernel_size_ * this->kernel_size_,
+          this->height_col_,
+          this->width_col_);
+
+  this->blob_top_cpu_->Reshape(this->blob_bottom_->num(),
+          this->channels_ * this->kernel_size_ * this->kernel_size_,
+          this->height_col_,
+          this->width_col_);
+
+  const TypeParam* bottom_data = this->blob_bottom_->gpu_data();
+  TypeParam* top_data = this->blob_top_->mutable_gpu_data();
+  TypeParam* cpu_data = this->blob_top_cpu_->mutable_cpu_data();
+
+  // CPU Version
+  for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+    im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n),
+      this->channels_, this->height_, this->width_, this->kernel_size_,
+      this->pad_, this->stride_, cpu_data + this->blob_top_cpu_->offset(n));
+  }
+
+  // GPU version
+  int num_kernels = this->channels_ * this->height_col_ * this->width_col_;
+  int default_grid_dim = CAFFE_GET_BLOCKS(num_kernels);
+
+  // Launch with different grid sizes
+  for (int grid_div = 2; grid_div <= 8; grid_div++) {
+    for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+      int grid_dim = default_grid_dim/grid_div;
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      im2col_gpu_kernel<TypeParam><<<grid_dim, CAFFE_CUDA_NUM_THREADS>>>(
+        num_kernels, bottom_data + this->blob_bottom_->offset(n),
+        this->height_, this->width_, this->kernel_size_, this->pad_,
+        this->stride_, this->height_col_, this->width_col_,
+        top_data + this->blob_top_->offset(n));
+      CUDA_POST_KERNEL_CHECK;
+    }
+
+    // Compare results against CPU version
+    for (int i = 0; i < this->blob_top_->count(); ++i) {
+      TypeParam cpuval = cpu_data[i];
+      TypeParam gpuval = this->blob_top_->cpu_data()[i];
+      EXPECT_EQ(cpuval, gpuval);
+      if (cpuval != gpuval) {
+        break;
+      }
+    }
+  }
+}
+
+}  // namespace caffe