2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef __NNFW_CKER_CONCATENATION_H__
19 #define __NNFW_CKER_CONCATENATION_H__
21 #include "cker/Shape.h"
22 #include "cker/Types.h"
32 template <typename Scalar>
33 inline void Concatenation(const ConcatenationParams ¶ms, const Shape *const *input_shapes,
34 const Scalar *const *input_data, const Shape &output_shape,
37 int axis = params.axis;
38 int inputs_count = params.inputs_count;
39 const int concat_dimensions = output_shape.DimensionsCount();
40 assert(axis < concat_dimensions);
42 int64_t concat_size = 0;
43 for (int i = 0; i < inputs_count; i++)
45 assert(input_shapes[i]->DimensionsCount() == concat_dimensions);
46 for (int j = 0; j < concat_dimensions; j++)
50 auto dim_checked = MatchingDim(*input_shapes[i], j, output_shape, j);
51 UNUSED_RELEASE(dim_checked);
54 concat_size += input_shapes[i]->Dims(axis);
56 assert(concat_size == output_shape.Dims(axis));
57 int64_t outer_size = 1;
58 for (int i = 0; i < axis; ++i)
60 outer_size *= output_shape.Dims(i);
62 // For all input arrays,
63 // FlatSize() = outer_size * Dims(axis) * base_inner_size;
64 int64_t base_inner_size = 1;
65 for (int i = axis + 1; i < concat_dimensions; ++i)
67 base_inner_size *= output_shape.Dims(i);
70 Scalar *output_ptr = output_data;
71 for (int k = 0; k < outer_size; k++)
73 for (int i = 0; i < inputs_count; ++i)
75 const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
76 memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
77 output_ptr += copy_size;
82 // quantized as it takes scale as a floating point value. This should be fixed
83 // when optimizng this routine further.
84 inline void ConcatenationWithScaling(const ConcatenationParams ¶ms,
85 const Shape *const *input_shapes,
86 const uint8_t *const *input_data, const Shape &output_shape,
89 int axis = params.axis;
90 const int32_t *input_zeropoint = params.input_zeropoint;
91 const float *input_scale = params.input_scale;
92 int inputs_count = params.inputs_count;
93 const int32_t output_zeropoint = params.output_zeropoint;
94 const float output_scale = params.output_scale;
96 const int concat_dimensions = output_shape.DimensionsCount();
97 assert(axis <= concat_dimensions);
99 int64_t concat_size = 0;
100 for (int i = 0; i < inputs_count; i++)
102 assert(input_shapes[i]->DimensionsCount() == concat_dimensions);
103 for (int j = 0; j < concat_dimensions; j++)
107 assert(input_shapes[i]->Dims(j) == output_shape.Dims(j));
110 concat_size += input_shapes[i]->Dims(axis);
112 assert(concat_size == output_shape.Dims(axis));
113 int64_t outer_size = 1;
114 for (int i = 0; i < axis; ++i)
116 outer_size *= output_shape.Dims(i);
118 // For all input arrays,
119 // FlatSize() = outer_size * Dims(axis) * base_inner_size;
120 int64_t base_inner_size = 1;
121 for (int i = axis + 1; i < concat_dimensions; ++i)
123 base_inner_size *= output_shape.Dims(i);
126 const float inverse_output_scale = 1.f / output_scale;
127 uint8_t *output_ptr = output_data;
128 for (int k = 0; k < outer_size; k++)
130 for (int i = 0; i < inputs_count; ++i)
132 const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
133 const uint8_t *input_ptr = input_data[i] + k * copy_size;
134 if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
136 memcpy(output_ptr, input_ptr, copy_size);
140 const float scale = input_scale[i] * inverse_output_scale;
141 const float bias = -input_zeropoint[i] * scale;
142 for (int j = 0; j < copy_size; ++j)
144 const int32_t value =
145 static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
146 output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
149 output_ptr += copy_size;
157 #endif // __NNFW_CKER_CONCATENATION_H__