2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef __NNFW_RUY_NEON_TENSOR_UTILS_H__
19 #define __NNFW_RUY_NEON_TENSOR_UTILS_H__
21 #include "ruy/neon/neon_check.h"
25 #define kFloatWeightsPerNeonLane 4
32 inline bool NeonIsZeroVector(const float *vector, int v_size)
34 // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot
35 // use the main vectorized loop, and we need to process sequentially.
36 // postamble_start shows the start index where this should happen.
37 const int postamble_start = v_size - (v_size & (kFloatWeightsPerNeonLane - 1));
39 const float32x4_t zero_x4_float = vmovq_n_f32(0.0f);
40 for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane)
42 const float32x4_t i_x4_float = vld1q_f32(vector + v);
43 uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float);
44 if (vgetq_lane_u32(cmp_result, 0) == 0)
46 if (vgetq_lane_u32(cmp_result, 1) == 0)
48 if (vgetq_lane_u32(cmp_result, 2) == 0)
50 if (vgetq_lane_u32(cmp_result, 3) == 0)
55 for (int v = postamble_start; v < v_size; ++v)
68 #endif // __NNFW_RUY_NEON_TENSOR_UTILS_H__