From: Yan Facai (颜发才) Date: Tue, 15 May 2018 18:34:50 +0000 (+0800) Subject: Add `AppendFloat16ArrayToTensorProto` to acclerate `tf.constant` for float16 (#19212) X-Git-Tag: upstream/v1.9.0_rc1~117 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=90fe7226a464983e72a0242d5a05e4acba309195;p=platform%2Fupstream%2Ftensorflow.git Add `AppendFloat16ArrayToTensorProto` to acclerate `tf.constant` for float16 (#19212) * PREP: add fast append for float16 * CLN: wrapper for float16 * CLN: replace append with extend method * Revert "CLN: replace append with extend method" This reverts commit 9958ba9bbf442e5b669e354b17b88c735719b366. --- diff --git a/tensorflow/python/framework/fast_tensor_util.pyx b/tensorflow/python/framework/fast_tensor_util.pyx index 1992831..17d112a 100644 --- a/tensorflow/python/framework/fast_tensor_util.pyx +++ b/tensorflow/python/framework/fast_tensor_util.pyx @@ -7,6 +7,18 @@ cimport numpy as np from tensorflow.python.util import compat +def AppendFloat16ArrayToTensorProto( + # For numpy, npy_half is a typedef for npy_uint16, + # see: https://github.com/numpy/numpy/blob/master/doc/source/reference/c-api.coremath.rst#half-precision-functions + # Because np.float16_t dosen't exist in cython, we use uint16_t here. + # TODO: Use np.float16_t when cython supports it. + tensor_proto, np.ndarray[np.uint16_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.half_val.append(nparray[i]) + + def AppendFloat32ArrayToTensorProto( tensor_proto, np.ndarray[np.float32_t, ndim=1] nparray): cdef long i, n diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 8cf2420..ca63efb 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -50,6 +50,13 @@ def SlowAppendFloat16ArrayToTensorProto(tensor_proto, proto_values): [ExtractBitsFromFloat16(x) for x in proto_values]) +def _MediumAppendFloat16ArrayToTensorProto(tensor_proto, proto_values): + # TODO: Remove the conversion if cython supports np.float16_t + fast_tensor_util.AppendFloat16ArrayToTensorProto( + tensor_proto, + np.asarray(proto_values, dtype=np.float16).view(np.uint16)) + + def ExtractBitsFromBFloat16(x): return np.asscalar( np.asarray(x, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16)) @@ -64,11 +71,8 @@ if _FAST_TENSOR_UTIL_AVAILABLE: _NP_TO_APPEND_FN = { dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto, - # TODO(sesse): We should have a - # fast_tensor_util.AppendFloat16ArrayToTensorProto, - # but it seems np.float16_t doesn't exist? np.float16: - SlowAppendFloat16ArrayToTensorProto, + _MediumAppendFloat16ArrayToTensorProto, np.float32: fast_tensor_util.AppendFloat32ArrayToTensorProto, np.float64: