From 7c7670c228b7621659cf7237458ea219b8314e27 Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Wed, 18 Sep 2013 14:55:52 -0700 Subject: [PATCH] update --- src/caffeine/layers/inner_product_layer.cu | 35 +++++------------------------- src/caffeine/test/test_util_blas.cpp | 24 ++++++++++---------- src/caffeine/util/blas.cpp | 16 +++++++------- src/caffeine/util/blas.hpp | 8 +++---- 4 files changed, 30 insertions(+), 53 deletions(-) diff --git a/src/caffeine/layers/inner_product_layer.cu b/src/caffeine/layers/inner_product_layer.cu index 5afe146..26a7c4b 100644 --- a/src/caffeine/layers/inner_product_layer.cu +++ b/src/caffeine/layers/inner_product_layer.cu @@ -6,6 +6,7 @@ #include "caffeine/filler.hpp" #include "caffeine/layer.hpp" #include "caffeine/vision_layers.hpp" +#include "caffeine/util/blas.hpp" namespace caffeine { @@ -52,36 +53,12 @@ void InnerProductLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0].cpu_data(); - const Dtype* bias = NULL; + caffeine_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, (Dtype)1., + bottom_data, weight, (Dtype)0., top_data); if (biasterm_) { - bias = this->blobs_[1].cpu_data(); - } - switch(sizeof(Dtype)) { - case sizeof(float): - // matrix multiply - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M_, N_, K_, - 1., (const float*)bottom_data, K_, (const float*)weight, N_, 0., - (float*)top_data, N_); - if (bias) { - // add bias - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M_, N_, 1, - 1., (const float*)bias_multiplier_->cpu_data(), 1, - (const float*)bias, N_, 1., (float*)top_data, N_); - } - break; - case sizeof(double): - // matrix multiply - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M_, N_, K_, - 1., (const double*)bottom_data, K_, (const double*)weight, N_, 0., - (double*)top_data, N_); - if (bias) { - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M_, N_, 1, - 1., (const float*)bias_multiplier_->cpu_data(), 1, - (const float*)bias, N_, 1., (float*)top_data, N_); - } - break; - default: - CHECK(false) << "Unknown data type."; + caffeine_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., + (Dtype*)bias_multiplier_->cpu_data(), this->blobs_[1].cpu_data(), + (Dtype)1., top_data); } } diff --git a/src/caffeine/test/test_util_blas.cpp b/src/caffeine/test/test_util_blas.cpp index 000311d..219967f 100644 --- a/src/caffeine/test/test_util_blas.cpp +++ b/src/caffeine/test/test_util_blas.cpp @@ -31,12 +31,12 @@ TYPED_TEST(GemmTest, TestGemm) { if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) { //[1,2,3; 4 5 6] * [1,2,3,4; 5,6,7,8; 9,10,11,12]; - decaf_cpu_gemm(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., + caffeine_cpu_gemm(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } - decaf_gpu_gemm(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., + caffeine_gpu_gemm(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); @@ -45,12 +45,12 @@ TYPED_TEST(GemmTest, TestGemm) { // Test when we have a transposed A A.Reshape(1,1,3,2); memcpy(A.mutable_cpu_data(), A_reshape_data, 6 * sizeof(TypeParam)); - decaf_cpu_gemm(CblasTrans, CblasNoTrans, 2, 4, 3, 1., + caffeine_cpu_gemm(CblasTrans, CblasNoTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } - decaf_gpu_gemm(CblasTrans, CblasNoTrans, 2, 4, 3, 1., + caffeine_gpu_gemm(CblasTrans, CblasNoTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); @@ -59,12 +59,12 @@ TYPED_TEST(GemmTest, TestGemm) { // Test when we have a transposed A and a transposed B too B.Reshape(1,1,4,3); memcpy(B.mutable_cpu_data(), B_reshape_data, 12 * sizeof(TypeParam)); - decaf_cpu_gemm(CblasTrans, CblasTrans, 2, 4, 3, 1., + caffeine_cpu_gemm(CblasTrans, CblasTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } - decaf_gpu_gemm(CblasTrans, CblasTrans, 2, 4, 3, 1., + caffeine_gpu_gemm(CblasTrans, CblasTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); @@ -73,12 +73,12 @@ TYPED_TEST(GemmTest, TestGemm) { // Test when we have a transposed B A.Reshape(1,1,2,3); memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam)); - decaf_cpu_gemm(CblasNoTrans, CblasTrans, 2, 4, 3, 1., + caffeine_cpu_gemm(CblasNoTrans, CblasTrans, 2, 4, 3, 1., A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); } - decaf_gpu_gemm(CblasNoTrans, CblasTrans, 2, 4, 3, 1., + caffeine_gpu_gemm(CblasNoTrans, CblasTrans, 2, 4, 3, 1., A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data()); for (int i = 0; i < 8; ++i) { EXPECT_EQ(C.cpu_data()[i], result[i]); @@ -100,12 +100,12 @@ TYPED_TEST(GemmTest, TestGemv) { memcpy(x.mutable_cpu_data(), data, 3 * sizeof(TypeParam)); if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) { - decaf_cpu_gemv(CblasNoTrans, 2, 3, 1., A.cpu_data(), + caffeine_cpu_gemv(CblasNoTrans, 2, 3, 1., A.cpu_data(), x.cpu_data(), 0., y.mutable_cpu_data()); for (int i = 0; i < 2; ++i) { EXPECT_EQ(y.cpu_data()[i], result_2[i]); } - decaf_gpu_gemv(CblasNoTrans, 2, 3, 1., A.gpu_data(), + caffeine_gpu_gemv(CblasNoTrans, 2, 3, 1., A.gpu_data(), x.gpu_data(), 0., y.mutable_gpu_data()); for (int i = 0; i < 2; ++i) { EXPECT_EQ(y.cpu_data()[i], result_2[i]); @@ -113,12 +113,12 @@ TYPED_TEST(GemmTest, TestGemv) { // Test transpose case memcpy(y.mutable_cpu_data(), data, 2 * sizeof(TypeParam)); - decaf_cpu_gemv(CblasTrans, 2, 3, 1., A.cpu_data(), + caffeine_cpu_gemv(CblasTrans, 2, 3, 1., A.cpu_data(), y.cpu_data(), 0., x.mutable_cpu_data()); for (int i = 0; i < 3; ++i) { EXPECT_EQ(x.cpu_data()[i], result_3[i]); } - decaf_gpu_gemv(CblasTrans, 2, 3, 1., A.gpu_data(), + caffeine_gpu_gemv(CblasTrans, 2, 3, 1., A.gpu_data(), y.gpu_data(), 0., x.mutable_gpu_data()); for (int i = 0; i < 3; ++i) { EXPECT_EQ(x.cpu_data()[i], result_3[i]); diff --git a/src/caffeine/util/blas.cpp b/src/caffeine/util/blas.cpp index a123632..e03bac2 100644 --- a/src/caffeine/util/blas.cpp +++ b/src/caffeine/util/blas.cpp @@ -6,7 +6,7 @@ namespace caffeine { template<> -void decaf_cpu_gemm(const CBLAS_TRANSPOSE TransA, +void caffeine_cpu_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float* A, const float* B, const float beta, float* C) { @@ -17,7 +17,7 @@ void decaf_cpu_gemm(const CBLAS_TRANSPOSE TransA, } template<> -void decaf_cpu_gemm(const CBLAS_TRANSPOSE TransA, +void caffeine_cpu_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double* A, const double* B, const double beta, double* C) { @@ -28,7 +28,7 @@ void decaf_cpu_gemm(const CBLAS_TRANSPOSE TransA, } template <> -void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, +void caffeine_gpu_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float* A, const float* B, const float beta, float* C) { @@ -44,7 +44,7 @@ void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, } template <> -void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, +void caffeine_gpu_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double* A, const double* B, const double beta, double* C) { @@ -60,21 +60,21 @@ void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, } template <> -void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, +void caffeine_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float* A, const float* x, const float beta, float* y) { cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); } template <> -void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, +void caffeine_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double* A, const double* x, const double beta, double* y) { cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); } template <> -void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, +void caffeine_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float* A, const float* x, const float beta, float* y) { cublasOperation_t cuTransA = @@ -84,7 +84,7 @@ void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, } template <> -void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, +void caffeine_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double* A, const double* x, const double beta, double* y) { cublasOperation_t cuTransA = diff --git a/src/caffeine/util/blas.hpp b/src/caffeine/util/blas.hpp index b1f4e3d..00eeea4 100644 --- a/src/caffeine/util/blas.hpp +++ b/src/caffeine/util/blas.hpp @@ -9,7 +9,7 @@ namespace caffeine { // Decaf gemm provides a simpler interface to the gemm functions, with the // limitation that the data has to be contiguous in memory. template -inline void decaf_cpu_gemm(const CBLAS_TRANSPOSE TransA, +void caffeine_cpu_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, Dtype* C); @@ -18,18 +18,18 @@ inline void decaf_cpu_gemm(const CBLAS_TRANSPOSE TransA, // gemm function - following the c convention and calling the fortran-order // gpu code under the hood. template -void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, +void caffeine_gpu_gemm(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, Dtype* C); template -void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, +void caffeine_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, Dtype* y); template -void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, +void caffeine_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, Dtype* y); -- 2.7.4