From 6450e0f0c8e19cde6c5e3c26ffb50deba4308bc7 Mon Sep 17 00:00:00 2001 From: James Thewlis Date: Thu, 26 Jun 2014 18:12:30 +0100 Subject: [PATCH] Test for im2col kernel With associated Makefile changes for .cu tests This tests that the grid-stride loop works for im2col, using the CPU version as a reference. --- Makefile | 14 +++- src/caffe/test/test_im2col_kernel.cu | 125 +++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 src/caffe/test/test_im2col_kernel.cu diff --git a/Makefile b/Makefile index 77e2ff5..1407274 100644 --- a/Makefile +++ b/Makefile @@ -30,11 +30,12 @@ CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp") # HXX_SRCS are the header files HXX_SRCS := $(shell find include/$(PROJECT) -name "*.hpp") # CU_SRCS are the cuda source files -CU_SRCS := $(shell find src/$(PROJECT) -name "*.cu") +CU_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cu" -name "*.cu") # TEST_SRCS are the test source files TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp") TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS)) +TEST_CU_SRCS := $(shell find src/$(PROJECT) -name "test_*.cu") GTEST_SRC := src/gtest/gtest-all.cpp # TEST_HDRS are the test header files TEST_HDRS := $(shell find src/$(PROJECT) -name "test_*.hpp") @@ -101,7 +102,9 @@ OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS) TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o}) TOOL_BUILD_DIR := $(BUILD_DIR)/tools TEST_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test -TEST_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o}) +TEST_CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o}) +TEST_CU_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_CU_SRCS:.cu=.cuo}) +TEST_OBJS := $(TEST_CXX_OBJS) $(TEST_CU_OBJS) GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o}) GTEST_BUILD_DIR := $(dir $(GTEST_OBJ)) EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o}) @@ -329,13 +332,18 @@ $(TEST_BUILD_DIR)/%.o: src/$(PROJECT)/test/%.cpp $(HXX_SRCS) $(TEST_HDRS) \ $(CXX) $< $(CXXFLAGS) -c -o $@ @ echo +$(TEST_BUILD_DIR)/%.cuo: src/$(PROJECT)/test/%.cu $(HXX_SRCS) $(TEST_HDRS) \ + | $(TEST_BUILD_DIR) + $(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ + @ echo + $(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \ | $(TEST_BIN_DIR) $(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) $(STATIC_NAME) \ -o $@ $(LINKFLAGS) $(LDFLAGS) @ echo -$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.o $(GTEST_OBJ) $(STATIC_NAME) \ +$(TEST_BIN_DIR)/%.testbin: $(TEST_BUILD_DIR)/%.*o $(GTEST_OBJ) $(STATIC_NAME) \ | $(TEST_BIN_DIR) $(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) $(STATIC_NAME) \ -o $@ $(LINKFLAGS) $(LDFLAGS) diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu new file mode 100644 index 0000000..f49a5b7 --- /dev/null +++ b/src/caffe/test/test_im2col_kernel.cu @@ -0,0 +1,125 @@ +// Copyright 2014 BVLC and contributors. + +#include +#include + +#include "cuda_runtime.h" +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/im2col.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +// Forward declare kernel functions +template +__global__ void im2col_gpu_kernel(const int n, const Dtype* data_im, + const int height, const int width, const int ksize, const int pad, + const int stride, const int height_col, const int width_col, + Dtype* data_col); + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class Im2colKernelTest : public ::testing::Test { + protected: + Im2colKernelTest() + // big so launches > 1024 threads + : blob_bottom_(new Blob(5, 500, 10, 10)), + blob_top_(new Blob()), + blob_top_cpu_(new Blob()) { + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + + height_ = blob_bottom_->height(); + width_ = blob_bottom_->width(); + channels_ = blob_bottom_->channels(); + pad_ = 0; + stride_ = 2; + kernel_size_ = 3; + height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1; + width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1; + } + + virtual ~Im2colKernelTest() { + delete blob_bottom_; + delete blob_top_; + delete blob_top_cpu_; + } + + Blob* const blob_bottom_; + Blob* const blob_top_; + Blob* const blob_top_cpu_; + int height_; + int width_; + int channels_; + int pad_; + int stride_; + int kernel_size_; + int height_col_; + int width_col_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(Im2colKernelTest, Dtypes); + +TYPED_TEST(Im2colKernelTest, TestGPU) { + Caffe::set_mode(Caffe::GPU); + + // Reshape the blobs to correct size for im2col output + this->blob_top_->Reshape(this->blob_bottom_->num(), + this->channels_ * this->kernel_size_ * this->kernel_size_, + this->height_col_, + this->width_col_); + + this->blob_top_cpu_->Reshape(this->blob_bottom_->num(), + this->channels_ * this->kernel_size_ * this->kernel_size_, + this->height_col_, + this->width_col_); + + const TypeParam* bottom_data = this->blob_bottom_->gpu_data(); + TypeParam* top_data = this->blob_top_->mutable_gpu_data(); + TypeParam* cpu_data = this->blob_top_cpu_->mutable_cpu_data(); + + // CPU Version + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n), + this->channels_, this->height_, this->width_, this->kernel_size_, + this->pad_, this->stride_, cpu_data + this->blob_top_cpu_->offset(n)); + } + + // GPU version + int num_kernels = this->channels_ * this->height_col_ * this->width_col_; + int default_grid_dim = CAFFE_GET_BLOCKS(num_kernels); + + // Launch with different grid sizes + for (int grid_div = 2; grid_div <= 8; grid_div++) { + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + int grid_dim = default_grid_dim/grid_div; + // NOLINT_NEXT_LINE(whitespace/operators) + im2col_gpu_kernel<<>>( + num_kernels, bottom_data + this->blob_bottom_->offset(n), + this->height_, this->width_, this->kernel_size_, this->pad_, + this->stride_, this->height_col_, this->width_col_, + top_data + this->blob_top_->offset(n)); + CUDA_POST_KERNEL_CHECK; + } + + // Compare results against CPU version + for (int i = 0; i < this->blob_top_->count(); ++i) { + TypeParam cpuval = cpu_data[i]; + TypeParam gpuval = this->blob_top_->cpu_data()[i]; + EXPECT_EQ(cpuval, gpuval); + if (cpuval != gpuval) { + break; + } + } + } +} + +} // namespace caffe -- 2.7.4