#include "gtest/gtest.h"
#include "caffeine/blob.hpp"
-#include "caffeine/util/gemm.hpp"
+#include "caffeine/util/blas.hpp"
namespace caffeine {
}
+TYPED_TEST(GemmTest, TestGemv) {
+ Blob<TypeParam> A(1,1,2,3);
+ Blob<TypeParam> x(1,1,1,3);
+ Blob<TypeParam> y(1,1,1,2);
+ TypeParam data[6] = {1, 2, 3, 4, 5, 6};
+ TypeParam result_2[2] = {14, 32};
+ TypeParam result_3[3] = {9, 12, 15};
+ memcpy(A.mutable_cpu_data(), data, 6 * sizeof(TypeParam));
+ memcpy(x.mutable_cpu_data(), data, 3 * sizeof(TypeParam));
+
+ if (sizeof(TypeParam) == 4 || CAFFEINE_TEST_CUDA_PROP.major >= 2) {
+ decaf_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(),
+ x.cpu_data(), 0., y.mutable_cpu_data());
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_EQ(y.cpu_data()[i], result_2[i]);
+ }
+ decaf_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(),
+ x.gpu_data(), 0., y.mutable_gpu_data());
+ for (int i = 0; i < 2; ++i) {
+ EXPECT_EQ(y.cpu_data()[i], result_2[i]);
+ }
+
+ // Test transpose case
+ memcpy(y.mutable_cpu_data(), data, 2 * sizeof(TypeParam));
+ decaf_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(),
+ y.cpu_data(), 0., x.mutable_cpu_data());
+ for (int i = 0; i < 3; ++i) {
+ EXPECT_EQ(x.cpu_data()[i], result_3[i]);
+ }
+ decaf_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(),
+ y.gpu_data(), 0., x.mutable_gpu_data());
+ for (int i = 0; i < 3; ++i) {
+ EXPECT_EQ(x.cpu_data()[i], result_3[i]);
+ }
+ } else {
+ LOG(ERROR) << "Skipping test due to old architecture.";
+ }
+}
+
}
#include <mkl.h>
#include <cublas_v2.h>
#include "caffeine/common.hpp"
-#include "caffeine/util/gemm.hpp"
+#include "caffeine/util/blas.hpp"
namespace caffeine {
N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}
+template <>
+void decaf_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const float alpha, const float* A, const float* x,
+ const float beta, float* y) {
+ cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
+}
+
+template <>
+void decaf_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const double alpha, const double* A, const double* x,
+ const double beta, double* y) {
+ cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
+}
+
+template <>
+void decaf_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const float alpha, const float* A, const float* x,
+ const float beta, float* y) {
+ cublasOperation_t cuTransA =
+ (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
+ CUBLAS_CHECK(cublasSgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha,
+ A, N, x, 1, &beta, y, 1));
+}
+
+template <>
+void decaf_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
+ const int N, const double alpha, const double* A, const double* x,
+ const double beta, double* y) {
+ cublasOperation_t cuTransA =
+ (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
+ CUBLAS_CHECK(cublasDgemv(Caffeine::cublas_handle(), cuTransA, N, M, &alpha,
+ A, N, x, 1, &beta, y, 1));
+}
} // namespace caffeine
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
Dtype* C);
+template <typename Dtype>
+void decaf_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
+ Dtype* y);
+
+template <typename Dtype>
+void decaf_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
+ Dtype* y);
+
} // namespace caffeine
+
#endif // CAFFEINE_UTIL_GEMM_H_