2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
19 #define __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
21 #include "cker/Shape.h"
22 #include "cker/Types.h"
23 #include "cker/Utils.h"
35 inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
36 const T *input1_data, const Shape &input2_shape,
37 const T *input2_data, const Shape &output_shape, T *output_data,
38 const std::function<T(const T &, const T &)> &fn)
40 const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
41 for (int i = 0; i < flat_size; ++i)
43 output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
44 params.quantized_activation_min,
45 params.quantized_activation_max);
50 inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
51 const float *input1_data, const Shape &input2_shape,
52 const float *input2_data, const Shape &output_shape,
54 const std::function<float(const float &, const float &)> &fn)
56 const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
57 for (int i = 0; i < size; i++)
60 ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
61 params.float_activation_min, params.float_activation_max);
66 inline void BroadcastBinaryArithmeticOpSlowQuant8(
67 const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data,
68 const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
69 const std::function<T(const BinaryArithmeticOpParam ¶ms, const T &, const T &)> &fn)
73 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
74 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
76 if ((params.quantized_activation_min < 0) && (params.quantized_activation_max > 255))
78 throw std::runtime_error{"Support only for Quant8."};
81 // Comment from tensorflow lite:
83 // In Tensorflow, the dimensions are canonically named (batch_number, row,
84 // col, channel), with extents (batches, height, width, depth), with the
85 // trailing dimension changing most rapidly (channels has the smallest stride,
86 // typically 1 element).
88 // In generated C code, we store arrays with the dimensions reversed. The
89 // first dimension has smallest stride.
91 // We name our variables by their Tensorflow convention, but generate C code
92 // nesting loops such that the innermost loop has the smallest stride for the
93 // best cache behavior.
94 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
96 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
98 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
100 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
102 output_data[Offset(extended_output_shape, b, y, x, c)] =
103 ActivationFunctionWithMinMax<uint8_t>(
104 fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
105 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
106 params.quantized_activation_min, params.quantized_activation_max);
112 template <typename T>
113 inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam ¶ms,
114 const Shape &input1_shape, const T *input1_data,
115 const Shape &input2_shape, const T *input2_data,
116 const Shape &output_shape, T *output_data,
117 const std::function<T(const T &, const T &)> &fn)
119 NdArrayDesc<4> desc1;
120 NdArrayDesc<4> desc2;
121 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
122 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
124 // Comment from tensorflow lite:
126 // In Tensorflow, the dimensions are canonically named (batch_number, row,
127 // col, channel), with extents (batches, height, width, depth), with the
128 // trailing dimension changing most rapidly (channels has the smallest stride,
129 // typically 1 element).
131 // In generated C code, we store arrays with the dimensions reversed. The
132 // first dimension has smallest stride.
134 // We name our variables by their Tensorflow convention, but generate C code
135 // nesting loops such that the innermost loop has the smallest stride for the
136 // best cache behavior.
137 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
139 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
141 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
143 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
145 output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
146 fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
147 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
148 params.quantized_activation_min, params.quantized_activation_max);
156 inline void BroadcastBinaryArithmeticOpSlow(
157 const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const float *input1_data,
158 const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
159 float *output_data, const std::function<float(const float &, const float &)> &fn)
161 NdArrayDesc<4> desc1;
162 NdArrayDesc<4> desc2;
163 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
164 const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
166 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
168 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
170 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
172 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
174 output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax(
175 fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
176 input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
177 params.float_activation_min, params.float_activation_max);
184 } // namespace reference
188 #endif // __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__