From c48042faae479a81cb1ef1ece854c135f88a3f7b Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Wed, 18 Sep 2013 14:45:52 -0700 Subject: [PATCH] update --- .../{test_util_gemm.cpp => test_util_blas.cpp} | 41 +++++++++++++++++++++- src/caffeine/util/{gemm.cpp => blas.cpp} | 35 +++++++++++++++++- src/caffeine/util/{gemm.hpp => blas.hpp} | 11 ++++++ 3 files changed, 85 insertions(+), 2 deletions(-) rename src/caffeine/test/{test_util_gemm.cpp => test_util_blas.cpp} (68%) rename src/caffeine/util/{gemm.cpp => blas.cpp} (63%) rename src/caffeine/util/{gemm.hpp => blas.hpp} (71%) diff --git a/src/caffeine/test/test_util_gemm.cpp b/src/caffeine/test/test_util_blas.cpp similarity index 68% rename from src/caffeine/test/test_util_gemm.cpp rename to src/caffeine/test/test_util_blas.cpp index 9ea7160..000311d 100644 --- a/src/caffeine/test/test_util_gemm.cpp +++ b/src/caffeine/test/test_util_blas.cpp @@ -5,7 +5,7 @@ #include "gtest/gtest.h" #include "caffeine/blob.hpp" -#include "caffeine/util/gemm.hpp" +#include "caffeine/util/blas.hpp" namespace caffeine { @@ -89,4 +89,43 @@ TYPED_TEST(GemmTest, TestGemm) { } +TYPED_TEST(GemmTest, TestGemv) { + Blob A(1,1,2,3); + Blob x(1,1,1,3); + Blob y(1,1,1,2); + TypeParam data[6] = {1, 2, 3, 4, 5, 6}; + TypeParam result_2[2] = {14, 32}; + TypeParam result_3[3] = {9, 12, 15}; + memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam)); + memcpy(x.mutable_cpu_data(), data, 3 * sizeof(TypeParam)); + + if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) { + decaf_cpu_gemv(CblasNoTrans, 2, 3, 1., A.cpu_data(), + x.cpu_data(), 0., y.mutable_cpu_data()); + for (int i = 0; i < 2; ++i) { + EXPECT_EQ(y.cpu_data()[i], result_2[i]); + } + decaf_gpu_gemv(CblasNoTrans, 2, 3, 1., A.gpu_data(), + x.gpu_data(), 0., y.mutable_gpu_data()); + for (int i = 0; i < 2; ++i) { + EXPECT_EQ(y.cpu_data()[i], result_2[i]); + } + + // Test transpose case + memcpy(y.mutable_cpu_data(), data, 2 * sizeof(TypeParam)); + decaf_cpu_gemv(CblasTrans, 2, 3, 1., A.cpu_data(), + y.cpu_data(), 0., x.mutable_cpu_data()); + for (int i = 0; i < 3; ++i) { + EXPECT_EQ(x.cpu_data()[i], result_3[i]); + } + decaf_gpu_gemv(CblasTrans, 2, 3, 1., A.gpu_data(), + y.gpu_data(), 0., x.mutable_gpu_data()); + for (int i = 0; i < 3; ++i) { + EXPECT_EQ(x.cpu_data()[i], result_3[i]); + } + } else { + LOG(ERROR) << "Skipping test due to old architecture."; + } +} + } diff --git a/src/caffeine/util/gemm.cpp b/src/caffeine/util/blas.cpp similarity index 63% rename from src/caffeine/util/gemm.cpp rename to src/caffeine/util/blas.cpp index 74a3766..a123632 100644 --- a/src/caffeine/util/gemm.cpp +++ b/src/caffeine/util/blas.cpp @@ -1,7 +1,7 @@ #include #include #include "caffeine/common.hpp" -#include "caffeine/util/gemm.hpp" +#include "caffeine/util/blas.hpp" namespace caffeine { @@ -59,5 +59,38 @@ void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); } +template <> +void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, + const int N, const float alpha, const float* A, const float* x, + const float beta, float* y) { + cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); +} + +template <> +void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, + const int N, const double alpha, const double* A, const double* x, + const double beta, double* y) { + cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); +} + +template <> +void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, + const int N, const float alpha, const float* A, const float* x, + const float beta, float* y) { + cublasOperation_t cuTransA = + (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; + CUBLAS_CHECK(cublasSgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha, + A, N, x, 1, &beta, y, 1)); +} + +template <> +void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, + const int N, const double alpha, const double* A, const double* x, + const double beta, double* y) { + cublasOperation_t cuTransA = + (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; + CUBLAS_CHECK(cublasDgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha, + A, N, x, 1, &beta, y, 1)); +} } // namespace caffeine diff --git a/src/caffeine/util/gemm.hpp b/src/caffeine/util/blas.hpp similarity index 71% rename from src/caffeine/util/gemm.hpp rename to src/caffeine/util/blas.hpp index f6af9c3..b1f4e3d 100644 --- a/src/caffeine/util/gemm.hpp +++ b/src/caffeine/util/blas.hpp @@ -23,6 +23,17 @@ void decaf_gpu_gemm(const CBLAS_TRANSPOSE TransA, const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, Dtype* C); +template +void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, + const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, + Dtype* y); + +template +void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, + const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, + Dtype* y); + } // namespace caffeine + #endif // CAFFEINE_UTIL_GEMM_H_ -- 2.7.4