From 939877bf4b5f37307eebd0cb035d65212b9449f6 Mon Sep 17 00:00:00 2001 From: PenghuiCheng <42089598+penghuicheng@users.noreply.github.com> Date: Fri, 7 Dec 2018 12:01:44 -0800 Subject: [PATCH] Implementation of WeightedSum op for mkl-dnn and fix FC op output shape issue. Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14407 Reviewed By: yinghai Differential Revision: D13364364 Pulled By: wesolwsk fbshipit-source-id: e69bcd1bc52e35b2f0e45e5dc40184f1bd66605d --- caffe2/ideep/operators/fully_connected_op.cc | 10 +++- caffe2/ideep/operators/operator_fallback_ideep.cc | 5 -- caffe2/ideep/operators/utility_ops.cc | 36 ++++++++++++++ caffe2/python/ideep/fc_op_test.py | 41 +++++++++++++++- caffe2/python/ideep/weightedsum_op_test.py | 57 +++++++++++++++++++++++ 5 files changed, 141 insertions(+), 8 deletions(-) create mode 100644 caffe2/python/ideep/weightedsum_op_test.py diff --git a/caffe2/ideep/operators/fully_connected_op.cc b/caffe2/ideep/operators/fully_connected_op.cc index 80ed367..609e528 100644 --- a/caffe2/ideep/operators/fully_connected_op.cc +++ b/caffe2/ideep/operators/fully_connected_op.cc @@ -101,9 +101,17 @@ class IDEEPFullyConnectedGradientOp final : public IDEEPOperator { ideep::inner_product_backward_weights::compute(X_in, dY, *dfilter, *dbias); + /** + * In mkl-dnn,weight gradient shape is determined by X_in, + * so we should ensure that weight gradient shape is consistent with weight shape. + */ + if (dfilter->get_dims() != filter.get_dims()) { + dfilter->reshape(filter.get_dims()); + } + if (OutputSize() > INPUT_GRAD) { ideep::inner_product_backward_data::compute( - dY, filter_in, X_in.get_dims(), *Output(INPUT_GRAD)); + dY, filter_in, X.get_dims(), *Output(INPUT_GRAD)); } return true; diff --git a/caffe2/ideep/operators/operator_fallback_ideep.cc b/caffe2/ideep/operators/operator_fallback_ideep.cc index 84d0dbb..a5f03a1 100644 --- a/caffe2/ideep/operators/operator_fallback_ideep.cc +++ b/caffe2/ideep/operators/operator_fallback_ideep.cc @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -139,10 +138,6 @@ REGISTER_IDEEP_OPERATOR( LearningRate, IDEEPFallbackOp>); REGISTER_IDEEP_OPERATOR( - WeightedSum, - IDEEPFallbackOp>); - -REGISTER_IDEEP_OPERATOR( LeakyRelu, IDEEPFallbackOp>); REGISTER_IDEEP_OPERATOR( diff --git a/caffe2/ideep/operators/utility_ops.cc b/caffe2/ideep/operators/utility_ops.cc index ecb5f82..98dc335 100644 --- a/caffe2/ideep/operators/utility_ops.cc +++ b/caffe2/ideep/operators/utility_ops.cc @@ -67,9 +67,45 @@ class CopyIDEEPToCPUOp final : public IDEEPOperator { } }; +class IDEEPWeightedSumOp : public IDEEPOperator { + public: + USE_IDEEP_DEF_ALIASES(); + USE_IDEEP_OPERATOR_FUNCTIONS(); + + IDEEPWeightedSumOp(const OperatorDef& operator_def, Workspace* ws) + : IDEEPOperator(operator_def, ws) {} + bool RunOnDevice() override { + CAFFE_ENFORCE_EQ(InputSize() % 2, 0); + auto ndims = Input(0).ndims(); + auto nelems = Input(0).get_nelems(); + auto w_nelems = Input(1).get_nelems(); + CAFFE_ENFORCE_GT(nelems, 0); + CAFFE_ENFORCE_EQ(w_nelems, 1); + auto* output = Output(0); + std::vector scales; + scales.reserve(InputSize() / 2); + std::vector inputs; + inputs.reserve(InputSize() / 2); + for (int i = 0; i < InputSize(); i += 2) { + auto& X = Input(i); + CAFFE_ENFORCE(X.ndims() == ndims); + CAFFE_ENFORCE(X.get_nelems() == nelems); + CAFFE_ENFORCE(Input(i + 1).get_nelems() == w_nelems); + inputs.push_back(X); + auto scale = static_cast(Input(i + 1).get_data_handle()); + scales.push_back(scale[0]); + } + + ideep::sum::compute(scales, inputs, *output); + + return true; + } +}; + REGISTER_IDEEP_OPERATOR(CopyCPUToIDEEP, CopyCPUToIDEEPOp); REGISTER_IDEEP_OPERATOR(CopyIDEEPToCPU, CopyIDEEPToCPUOp); REGISTER_IDEEP_OPERATOR(Copy, IDEEPCopyOp); +REGISTER_IDEEP_OPERATOR(WeightedSum, IDEEPWeightedSumOp); OPERATOR_SCHEMA(CopyCPUToIDEEP) .NumInputs(1) diff --git a/caffe2/python/ideep/fc_op_test.py b/caffe2/python/ideep/fc_op_test.py index 03deedb..ba1ce10 100644 --- a/caffe2/python/ideep/fc_op_test.py +++ b/caffe2/python/ideep/fc_op_test.py @@ -12,11 +12,12 @@ from caffe2.python import core, workspace import caffe2.python.hypothesis_test_util as hu import caffe2.python.ideep_test_util as mu + @unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.") class FcTest(hu.HypothesisTestCase): @given(n=st.integers(1, 5), m=st.integers(1, 5), k=st.integers(1, 5), **mu.gcs) - def test_fc(self,n, m, k, gc, dc): + def test_fc_2_dims(self, n, m, k, gc, dc): X = np.random.rand(m, k).astype(np.float32) - 0.5 W = np.random.rand(n, k).astype(np.float32) - 0.5 b = np.random.rand(n).astype(np.float32) - 0.5 @@ -25,7 +26,7 @@ class FcTest(hu.HypothesisTestCase): 'FC', ['X', 'W', 'b'], ["Y"] - ) + ) self.assertDeviceChecks(dc, op, [X, W, b], [0]) @@ -222,6 +223,42 @@ class FcTest(hu.HypothesisTestCase): print(np.max(np.abs(db1 - db0))) self.assertTrue(False) + @given(n=st.integers(1, 5), m=st.integers(1, 5), + k=st.integers(1, 5), **mu.gcs) + def test_fc_4_dims_src(self, n, m, k, gc, dc): + X = np.random.rand(m, k, m, m).astype(np.float32) - 0.5 + W = np.random.rand(n, k * m * m).astype(np.float32) - 0.5 + b = np.random.rand(n).astype(np.float32) - 0.5 + + op = core.CreateOperator( + 'FC', + ['X', 'W', 'b'], + ["Y"] + ) + + self.assertDeviceChecks(dc, op, [X, W, b], [0]) + + for i in range(3): + self.assertGradientChecks(gc, op, [X, W, b], i, [0]) + + @given(n=st.integers(1, 5), m=st.integers(1, 5), + k=st.integers(1, 5), **mu.gcs) + def test_fc_4_dims(self, n, m, k, gc, dc): + X = np.random.rand(m, k, m, m).astype(np.float32) - 0.5 + W = np.random.rand(n, k, m, m).astype(np.float32) - 0.5 + b = np.random.rand(n).astype(np.float32) - 0.5 + + op = core.CreateOperator( + 'FC', + ['X', 'W', 'b'], + ["Y"] + ) + + self.assertDeviceChecks(dc, op, [X, W, b], [0]) + + for i in range(3): + self.assertGradientChecks(gc, op, [X, W, b], i, [0]) + if __name__ == "__main__": unittest.main() diff --git a/caffe2/python/ideep/weightedsum_op_test.py b/caffe2/python/ideep/weightedsum_op_test.py new file mode 100644 index 0000000..2a0b3ec --- /dev/null +++ b/caffe2/python/ideep/weightedsum_op_test.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import hypothesis.strategies as st +import unittest +import caffe2.python.hypothesis_test_util as hu +from caffe2.python import core, workspace +from hypothesis import given +import caffe2.python.ideep_test_util as mu + + +@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.") +class TestWeightedSumOp(hu.HypothesisTestCase): + @given(n=st.integers(5, 8), m=st.integers(1, 1), + d=st.integers(2, 4), grad_on_w=st.booleans(), + **mu.gcs_ideep_only) + def test_weighted_sum(self, n, m, d, grad_on_w, gc, dc): + input_names = [] + input_vars = [] + for i in range(m): + X_name = 'X' + str(i) + w_name = 'w' + str(i) + input_names.extend([X_name, w_name]) + var = np.random.rand(n, d).astype(np.float32) + vars()[X_name] = var + input_vars.append(var) + var = np.random.rand(1).astype(np.float32) + vars()[w_name] = var + input_vars.append(var) + + def weighted_sum_op_ref(*args): + res = np.zeros((n, d)) + for i in range(m): + res = res + args[2 * i + 1] * args[2 * i] + + return (res, ) + + op = core.CreateOperator( + "WeightedSum", + input_names, + ['Y'], + grad_on_w=grad_on_w, + ) + + self.assertReferenceChecks( + device_option=gc, + op=op, + inputs=input_vars, + reference=weighted_sum_op_ref, + ) + + +if __name__ == "__main__": + unittest.main() -- 2.7.4