- Added Broadcasting to interpreter and shapeInference.
- Fixed interpreter impl and placed const in correct location
- Added broadcasting to softBackend
- Added tests and cpp backend implementations for broadcasted `+, -, *, /, max`
- Added generic implementation of Elementwise Broadcast for ease of
- fixed codestyle; removed shape inference file
Signed-off-by: Andrei Shedko <a.shedko@samsung.com>
"modelIR/operations/Conv2DOp.cpp"
"modelIR/operations/DeConv2DOp.cpp"
"modelIR/operations/DepthwiseConv2DOp.cpp"
+ "modelIR/operations/ElementwiseOp.cpp"
"modelIR/operations/FullyConnectedOp.cpp"
"modelIR/operations/GatherOp.cpp"
"modelIR/operations/GemmOp.cpp"
{
TensorVariant::TensorVariant(const Shape& shape, const std::shared_ptr<char>& data,
- DTYPE dtype, size_t element_size)
- : _dtype(dtype), _data(data), _strides{0}, _rank(shape.rank()),
- _shape(shape), _element_size(element_size)
+ DTYPE dtype, size_t element_size) :
+ _dtype(dtype), _data(data), _strides{0}, _rank(shape.rank()),
+ _shape(shape), _elementSize(element_size)
{
int stride = 1;
for (int d = _rank - 1; d >= 0; --d)
}
}
+/**
+ * @brief Construct a TensorVariant from t_old that has strides with 0 where dim = 1
+ * Used for broadcasting
+ * @param t_old TensorVariant to use as base
+ * @param shape shape to broadcast to
+ */
+TensorVariant::TensorVariant(const TensorVariant& t_old,
+ const Shape& shape)
+ : _dtype(t_old._dtype), _data(t_old._data), _strides{0}, _rank(shape.rank()),
+ _shape(shape), _elementSize(t_old._elementSize) {
+ int axis_old = t_old._rank - 1;
+ for (int d = _rank - 1; d >= 0; d--) {
+ if (t_old._shape.dim(axis_old) == 1)
+ _strides[d] = 0;
+ else
+ _strides[d] = t_old._strides[axis_old];
+ axis_old--;
+ if (axis_old == -1)
+ break;
+ }
+}
+
char *TensorVariant::at(const Index &idx) const
{
- return _data.get() + getOffset(idx) * _element_size;
+ return _data.get() + getOffset(idx) * _elementSize;
}
size_t TensorVariant::getOffset(const Index &idx) const {
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/modelIR/operations/ElementwiseOp.h"
+
+namespace nnc {
+namespace mir {
+namespace ops {
+
+void ElementwiseOp::inferOutputShapes() {
+ int max_rank = getInputShape(0).rank();
+ size_t max_ind = 0;
+ for (size_t i = 0; i < getNumInputs(); i++) {
+ if (max_rank < getInputShape(i).rank()) {
+ max_rank = getInputShape(i).rank();
+ max_ind = i;
+ }
+ }
+ Shape max_shape = getInputShape(max_ind);
+ for (size_t i = 0; i < getNumInputs(); i++) {
+ const auto& current_shape = getInputShape(i);
+ _needsBroadcast = _needsBroadcast || max_shape != current_shape; // check not equal
+ const int rank = current_shape.rank();
+ for (int axis = 0; axis < rank; axis++) {
+ auto current_dim = current_shape.dim(rank - axis - 1);
+ // get max for all axes
+ if (max_shape.dim(max_rank - axis - 1) == 1 && current_dim != 1) {
+ max_shape.dim(max_rank - axis - 1) = current_dim;
+ } else {
+ assert((current_dim == 1 || current_dim == max_shape.dim(max_rank - axis - 1))
+ && "Incompatible shapes in broadcast!");
+ }
+ }
+ }
+ setOutputShape(0, max_shape);
+}
+
+} // namespace ops
+} // namespace mir
+} // namespace nnc
explicit TensorVariant(const Shape& shape, const std::shared_ptr<char>& data, DTYPE dtype, size_t element_size);
+ explicit TensorVariant(const TensorVariant& t_old, const Shape& shape);
+
template<typename T>
explicit TensorVariant(const Shape& shape, const std::shared_ptr<T>& data, DTYPE dtype) :
TensorVariant(
virtual const Shape &getShape() const { return _shape; }
DTYPE getDataType() const { return _dtype; }
- size_t getElementSize() const { return _element_size; }
+
+ size_t getElementSize() const { return _elementSize; }
private:
DTYPE _dtype;
size_t _rank;
Shape _shape;
- size_t _element_size;
+ size_t _elementSize;
};
} // namespace mir
* @param num_inputs Number of inputs
*/
ElementwiseOp(const std::vector<IODescriptor>& args, OpType op_type)
- : Operation(Type::elementwise, args), _opType(op_type) {
- // Infer output shape.
- // TODO Check that all inputs have the same shape.
- setOutputShape(0, getInputShape(0));
+ : Operation(Type::elementwise, args), _opType(op_type), _needsBroadcast(false) {
+ inferOutputShapes();
};
+ bool getBroadcast() const { return _needsBroadcast; }
+
private:
+ void inferOutputShapes();
+
OpType _opType;
+ bool _needsBroadcast;
public:
OpType getOpType() const { return _opType; }
mapByName(&op);
auto operands = op.getPrevNodes();
std::vector<Tensor<float>> ins;
+ // Reserve space for tensor variants to avoid reference invalidation when pushing into vector
+ std::vector<TensorVariant> broadcasted{};
+ broadcasted.reserve(op.getNumInputs());
+
for (auto &in : operands) {
- ins.emplace_back(var(in.op->getId())[in.index]);
+ auto& tmp = var(in.op->getId())[in.index];
+ if (op.getBroadcast()) {
+ broadcasted.emplace_back(tmp, op.getOutputShape(0));
+ ins.emplace_back(broadcasted.back());
+ } else {
+ ins.emplace_back(tmp);
+ }
}
float (*func)(float,float); // Another dirty hack
switch (op.getOpType()) {
default:
assert(false && "Unsupported Optype");
}
-
- var(op.getId()) = Fill<float>(op.getOutputShape(0), [&func, &ins, &op](const Index &id) {
- float acc = ins[0].at(id);
- for (size_t i = 1; i < ins.size() ; i++)
- acc = func(acc, ins[i].at(id));
- return acc;
- })();
+ var(op.getId()) = Fill<float>(
+ op.getOutputShape(0),
+ [&func, &ins](const Index& id) {
+ float acc = ins[0].at(id);
+ for (size_t i = 1; i < ins.size(); i++)
+ acc = func(acc, ins[i].at(id));
+ return acc;
+ })();
}
void NNInterpreter::visit(ops::DeConv2DOp& op) {
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Elementwise.h"
-//do not delete
-//used to force compilation of elementwise.h
void Serializer::visit(mir::ops::ElementwiseOp& op) {
_curOp->_paramStartOffset = _buffer.size();
// Op type is known at codegen Time
- serializeT<int>((int32_t) op.getNumInputs());
+ serializeT<int>((int32_t)op.getNumInputs());
+ serializeT<int>((int32_t)op.getBroadcast());
+ serializeShape(op.getOutputShape(0));
}
void Serializer::visit(mir::ops::EluOp& op) {
BuildFrom(init_list);
}
+ // get bigger shape for elementwise Ops
+ void maxShape(RuntimeShape const& other) {
+ TFLITE_CHECK(other.DimensionsCount() == size_ && size_ == 4 && "Elementwise shapes must be 4d");
+ for (size_t i = 0; i < 4; i++) {
+ dims_[i] = std::max(dims_[i], other.dims_[i]);
+ }
+ }
+
// Avoid using this constructor. We should be able to delete it when C++17
// rolls out.
RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
return ArraySize(array1, index1);
}
+// Flat size calculation, checking that dimensions match with one or more other
+// arrays.
+inline int MatchingFlatSize(const RuntimeShape& shape,
+ const RuntimeShape& check_shape_0) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i) {
+ TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
+ }
+ return shape.FlatSize();
+}
+
+inline int MatchingFlatSize(const RuntimeShape& shape,
+ const RuntimeShape& check_shape_0,
+ const RuntimeShape& check_shape_1) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i) {
+ TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
+ }
+ return MatchingFlatSize(shape, check_shape_1);
+}
+
+inline int MatchingFlatSize(const RuntimeShape& shape,
+ const RuntimeShape& check_shape_0,
+ const RuntimeShape& check_shape_1,
+ const RuntimeShape& check_shape_2) {
+ TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount());
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i) {
+ TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
+ }
+ return MatchingFlatSize(shape, check_shape_1, check_shape_2);
+}
+
template <int N>
inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0) {
for (int i = 0; i < N; ++i) {
}
template <typename Scalar>
+VectorMap<Scalar> MapAsVector(Scalar* data, const size_t size) {
+ return VectorMap<Scalar>(data, size, 1);
+}
+
+template <typename Scalar>
using MatrixMap = typename std::conditional<
std::is_const<Scalar>::value,
Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type,
int8 perm_count;
int32 perm[4];
};
+
+// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
+// BROADCASTING.
+//
+// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
+// rectangular array of numbers.
+//
+// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
+// However, as Dims<N> is to be deprecated, this class exists as an adaptor
+// to enable simple unoptimized implementations of element-wise broadcasting
+// operations.
+template <int N>
+struct NdArrayDesc {
+ // The "extent" of each dimension. Indices along dimension d must be in the
+ // half-open interval [0, extents[d]).
+ int extents[N];
+
+ // The number of *elements* (not bytes) between consecutive indices of each
+ // dimension.
+ int strides[N];
+};
+
+// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
+// BROADCASTING.
+//
+// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
+inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
+ int i3) {
+ TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
+ TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
+ TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
+ TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
+ return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
+ i3 * desc.strides[3];
+}
+
+template <int N>
+inline void NdArrayDescsForElementwiseBroadcast(
+ const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
+ NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
+ TFLITE_DCHECK(desc0_out != nullptr);
+ TFLITE_DCHECK(desc1_out != nullptr);
+
+ auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
+ auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
+
+ // Copy dims to desc, calculating strides.
+ int desc0_stride = 1;
+ int desc1_stride = 1;
+ for (int i = N - 1; i >= 0; --i) {
+ desc0_out->extents[i] = extended_input0_shape.Dims(i);
+ desc0_out->strides[i] = desc0_stride;
+ desc0_stride *= extended_input0_shape.Dims(i);
+ desc1_out->extents[i] = extended_input1_shape.Dims(i);
+ desc1_out->strides[i] = desc1_stride;
+ desc1_stride *= extended_input1_shape.Dims(i);
+ }
+
+ // Walk over each dimension. If the extents are equal do nothing.
+ // Otherwise, set the desc with extent 1 to have extent equal to the other and
+ // stride 0.
+ for (int i = 0; i < N; ++i) {
+ const int extent0 = extended_input0_shape.Dims(i);
+ const int extent1 = extended_input1_shape.Dims(i);
+ if (extent0 != extent1) {
+ if (extent0 == 1) {
+ desc0_out->strides[i] = 0;
+ desc0_out->extents[i] = extent1;
+ } else {
+ TFLITE_DCHECK_EQ(extent1, 1);
+ desc1_out->strides[i] = 0;
+ desc1_out->extents[i] = extent0;
+ }
+ }
+ }
+}
limitations under the License.
==============================================================================*/
+// TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
+// generalized and efficient BroadcastBinaryFunction.
+//
+// Also appears to duplicte MinimumMaximum.
+//
+// R: Result type. T1: Input 1 type. T2: Input 2 type.
+template <typename R, typename T1, typename T2>
+inline void BroadcastBinaryFunction4DSlow(
+ const RuntimeShape& unextended_input1_shape, const T1* input1_data,
+ const RuntimeShape& unextended_input2_shape, const T2* input2_data,
+ const RuntimeShape& unextended_output_shape, R* output_data,
+ R (* func)(T1, T2)) {
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
+ unextended_input2_shape, &desc1, &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b) {
+ for (int y = 0; y < output_shape.Dims(1); ++y) {
+ for (int x = 0; x < output_shape.Dims(2); ++x) {
+ for (int c = 0; c < output_shape.Dims(3); ++c) {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = func(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+// R: Result type. T1: Input 1 type. T2: Input 2 type.
+// TODO(renjieliu): Refactor other binary functions to use this one.
+template <typename R, typename T1, typename T2>
+inline void BinaryFunction(const RuntimeShape& input1_shape,
+ const T1* input1_data,
+ const RuntimeShape& input2_shape,
+ const T2* input2_data,
+ const RuntimeShape& output_shape, R* output_data,
+ R (* func)(T1, T2)) {
+ const int flat_size =
+ MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i) {
+ output_data[i] = func(input1_data[i], input2_data[i]);
+ }
+}
+
struct Add {
static inline void Add_(const float* input1_data, const float* input2_data,
- float* output_data, const int size) {
+ float* output_data, const int size) {
int i = 0;
#ifdef USE_NEON
-
for (; i <= size - 16; i += 16) {
auto a10 = vld1q_f32(input1_data + i);
auto a11 = vld1q_f32(input1_data + i + 4);
}
}
- static inline void Call(const float* input1_data, const float* input2_data,
- float* output_data, Dims<4> dims) {
- Add_(input1_data, input2_data, output_data, FlatSize(dims));
+ static inline void Call(
+ const float* input1_data, RuntimeShape in1_shape,
+ const float* input2_data, RuntimeShape in2_shape,
+ float* output_data, RuntimeShape out_shape,
+ bool needsBroadcast) {
+ if (needsBroadcast) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a + b; }
+ );
+ } else {
+ Add_(input1_data, input2_data, output_data, out_shape.FlatSize());
+ }
}
};
struct Max {
- static inline void Call(const float* input1_data, const float* input2_data,
- float* output_data, Dims<4> dims) {
- auto output = MapAsVector(output_data, dims);
- output = output.cwiseMax( MapAsVector(input2_data, dims) );
+ static inline void Call(
+ const float* input1_data, RuntimeShape in1_shape,
+ const float* input2_data, RuntimeShape in2_shape,
+ float* output_data, RuntimeShape out_shape,
+ const bool needsBroadcast) {
+ if (needsBroadcast) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return std::max(a, b); }
+ );
+ } else {
+ auto output = MapAsVector(output_data, out_shape.FlatSize());
+ output = output.cwiseMax(MapAsVector(input2_data, out_shape.FlatSize()));
+ }
}
};
struct Mul {
- static inline void Call(const float* input1_data, const float* input2_data,
- float* output_data, Dims<4> dims) {
- Mul_(input1_data, input2_data, output_data, FlatSize(dims));
+ static inline void Call(const float* input1_data, RuntimeShape in1_shape,
+ const float* input2_data, RuntimeShape in2_shape,
+ float* output_data, RuntimeShape out_shape,
+ const bool needsBroadcast) {
+ if (needsBroadcast) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a * b; });
+ } else {
+ Mul_(input1_data, input2_data, output_data, out_shape.FlatSize());
+ }
}
static inline void Mul_(const float* input1_data, const float* input2_data,
- float* output_data, const int size) {
+ float* output_data, const int size) {
int i = 0;
#ifdef USE_NEON
//TODO maybe move to a separate file since everything else here is extracted from TF Lite
//23.11.2018
struct Div {
- static inline void Call(const float* input1_data, const float* input2_data,
- float* output_data, Dims<4> dims) {
- auto output = MapAsVector(output_data, dims);
- output = output.cwiseQuotient(MapAsVector(input2_data, dims));
+ static inline void Call(
+ const float* input1_data, RuntimeShape in1_shape,
+ const float* input2_data, RuntimeShape in2_shape,
+ float* output_data, RuntimeShape out_shape,
+ bool needsBroadcast) {
+ if (needsBroadcast) {
+ BroadcastBinaryFunction4DSlow<float, float, float>(
+ in1_shape, input1_data,
+ in2_shape, input2_data,
+ out_shape, output_data,
+ [](float a, float b) { return a / b; }
+ );
+ } else {
+ auto output = MapAsVector(output_data, out_shape.FlatSize());
+ output = output.cwiseQuotient(MapAsVector(input2_data, out_shape.FlatSize()));
+ }
}
-};
+};
\ No newline at end of file
void ElementWise(Tensor &out,
const char *params, const Args &...inputs) {
const float *input[] = {inputs.getData()...};
+ RuntimeShape in_shapes[] = {shapeToRuntimeShape(inputs.getShape())...};
- auto ins = std::tie(inputs...);
-
- const Dims<4> out_d = shapeToDims(std::get<0>(ins).getShape());
const int32_t num_inputs = deserializeT<int32_t>(params);
+ const bool needs_broadcast = (bool)deserializeT<int32_t>(params);
+ const Shape out_shape = deserializeShape(params);
- out.reShape(std::get<0>(ins).getShape());
+ out.reShape(out_shape);
out.fillData(input[0]);
- for (int32_t i = 1; i < num_inputs; ++i) {
- F::Call(out.getData(), input[i], out.getData(), out_d);
+ const auto out_rt = shapeToRuntimeShape(out_shape);
+ if (!needs_broadcast) {
+ for (int32_t i = 1; i < num_inputs; ++i) {
+ F::Call(out.getData(), out_rt,
+ input[i], in_shapes[i],
+ out.getData(), out_rt,
+ 0);
+ }
+ } else {
+ auto running_shape = RuntimeShape::ExtendedShape(4, in_shapes[0]);
+ for (int32_t i = 1; i < num_inputs; ++i) {
+ float* inp_tmp = new float[running_shape.FlatSize()];
+ memcpy(inp_tmp, out.getData(), (size_t)running_shape.FlatSize() * 4);
+ F::Call(inp_tmp, running_shape,
+ input[i], in_shapes[i],
+ out.getData(), out_rt,
+ 1);
+ // This modifies the running shape
+ running_shape.maxShape(RuntimeShape::ExtendedShape(4, in_shapes[i]));
+ }
}
+
}
// TODO refactor tflite's code for this op
#include "core/modelIR/Graph.h"
#include "core/modelIR/operations/ReshapeOp.h"
+#include "core/modelIR/operations/ElementwiseOp.h"
#include "core/modelIR/operations/ResizeOp.h"
#include "core/modelIR/operations/SqueezeOp.h"
#include "core/modelIR/operations/ReduceFOp.h"
ASSERT_EQ(sq1->getOutputShape(0), expected_shape);
}
+TEST(ShapeInferenceTest, ElementwiseBC) {
+ Graph g;
+
+ Shape input_shape{1, 10, 10, 1};
+ Shape input2_shape{1, 1, 10, 10};
+ auto input = g.create<ops::VariableOp>("input1", input_shape);
+ auto input2 = g.create<ops::VariableOp>("input2", input2_shape);
+
+ auto add = g.create<ops::ElementwiseOp>("add_1",
+ std::vector<IODescriptor>{input->getOutput(0),
+ input2->getOutput(0)},
+ ops::ElementwiseOp::OpType::add);
+
+ ASSERT_EQ(add->getOutputShape(0), Shape({1, 10, 10, 10}));
+}
+
TEST(ShapeInferenceTest, SqueezeTestSpecificDims) {
Graph g;
}
}
+TEST(cpp_operations_test, add2bc) {
+ for (int numDims = 2; numDims <= 4; ++numDims) {
+ // test prerequisites
+ vector<int> shape_data1{3, 44, 5, 1};
+ vector<int> shape_data2{3, 1, 5, 6};
+ shape_data1.resize(numDims);
+ shape_data2.resize(numDims);
+ vector<Tensor> input_a_tensors(2);
+ vector<unique_ptr<mir::TensorVariant>> input_n_tensors(2);
+ fillTensors(input_n_tensors[0], input_a_tensors[0], shape_data1, 1.0f);
+ fillTensors(input_n_tensors[1], input_a_tensors[1], shape_data2, 2.0f);
+ auto op_generator = [](mir::Graph& g, const std::vector<mir::IODescriptor>& inputs) {
+ return g.create<mir::ops::ElementwiseOp>("y", inputs, mir::ops::ElementwiseOp::OpType::add);
+ };
+
+ createAndRunTestGraph(op_generator, ElementWise<Add, Tensor, Tensor>, input_n_tensors,
+ input_a_tensors[0],
+ input_a_tensors[1]);
+ }
+}
+
+TEST(cpp_operations_test, mul3bc) {
+ for (int numDims = 2; numDims <= 4; ++numDims) {
+ // test prerequisites
+ vector<int> shape_data1{3, 22, 5, 1};
+ vector<int> shape_data2{3, 1, 5, 6};
+ vector<int> shape_data3{1, 22, 1, 6};
+ shape_data1.resize(numDims);
+ shape_data2.resize(numDims);
+ shape_data3.resize(numDims);
+ vector<Tensor> input_a_tensors(3);
+ vector<unique_ptr<mir::TensorVariant>> input_n_tensors(3);
+ fillTensors(input_n_tensors[0], input_a_tensors[0], shape_data1, 1.0f);
+ fillTensors(input_n_tensors[1], input_a_tensors[1], shape_data2, 2.0f);
+ fillTensors(input_n_tensors[2], input_a_tensors[2], shape_data3, 3.0f);
+ auto opGenerator = [](mir::Graph& g, const std::vector<mir::IODescriptor>& inputs) {
+ return g.create<mir::ops::ElementwiseOp>("y", inputs, mir::ops::ElementwiseOp::OpType::mul);
+ };
+
+ createAndRunTestGraph(opGenerator, ElementWise<Mul, Tensor, Tensor, Tensor>, input_n_tensors,
+ input_a_tensors[0],
+ input_a_tensors[1], input_a_tensors[2]);
+ }
+}
+
+TEST(cpp_operations_test, div3bc) {
+ for (int numDims = 2; numDims <= 4; ++numDims) {
+ // test prerequisites
+ vector<int> shape_data1{3, 22, 5, 1};
+ vector<int> shape_data2{3, 1, 5, 6};
+ vector<int> shape_data3{1, 22, 1, 6};
+ shape_data1.resize(numDims);
+ shape_data2.resize(numDims);
+ shape_data3.resize(numDims);
+ vector<Tensor> input_a_tensors(3);
+ vector<unique_ptr<mir::TensorVariant>> input_n_tensors(3);
+ fillTensors(input_n_tensors[0], input_a_tensors[0], shape_data1, 5.0f);
+ fillTensors(input_n_tensors[1], input_a_tensors[1], shape_data2, 2.0f);
+ fillTensors(input_n_tensors[2], input_a_tensors[2], shape_data3, 3.0f);
+ auto opGenerator = [](mir::Graph& g, const std::vector<mir::IODescriptor>& inputs) {
+ return g.create<mir::ops::ElementwiseOp>("y", inputs, mir::ops::ElementwiseOp::OpType::div);
+ };
+
+ createAndRunTestGraph(
+ opGenerator, ElementWise<Div, Tensor, Tensor, Tensor>,
+ input_n_tensors,
+ input_a_tensors[0],
+ input_a_tensors[1],
+ input_a_tensors[2]
+ );
+ }
+}
+
TEST(cpp_operations_test, add2) {
for (int numDims = 2; numDims <= 4; ++numDims) {
return g.create<mir::ops::ElementwiseOp>("y", inputs, mir::ops::ElementwiseOp::OpType::add);
};
- createAndRunTestGraph(op_generator, ElementWise<Add,Tensor,Tensor>, input_n_tensors, input_a_tensors[0],
+ createAndRunTestGraph(op_generator, ElementWise<Add, Tensor, Tensor>, input_n_tensors,
+ input_a_tensors[0],
input_a_tensors[1]);
}
}
return g.create<mir::ops::ElementwiseOp>("y", inputs, mir::ops::ElementwiseOp::OpType::mul);
};
- createAndRunTestGraph(opGenerator, ElementWise<Mul,Tensor,Tensor,Tensor>, input_n_tensors, input_a_tensors[0],
+ createAndRunTestGraph(opGenerator, ElementWise<Mul, Tensor, Tensor, Tensor>, input_n_tensors,
+ input_a_tensors[0],
input_a_tensors[1], input_a_tensors[2]);
}
}
return g.create<mir::ops::ElementwiseOp>("y", inputs, mir::ops::ElementwiseOp::OpType::max);
};
- createAndRunTestGraph(opGenerator, ElementWise<Max,Tensor,Tensor,Tensor,Tensor>, input_n_tensors, input_a_tensors[0],
+ createAndRunTestGraph(opGenerator, ElementWise<Max, Tensor, Tensor, Tensor, Tensor>,
+ input_n_tensors, input_a_tensors[0],
input_a_tensors[1], input_a_tensors[2], input_a_tensors[3]);
}
}
for (iT inputC = 1; inputC <= 3; ++inputC)
for (iT outputC = 1; outputC <= 3; ++outputC)
for (iT strideH = 1; strideH <= 3; ++strideH)
- for (iT strideW = 1; strideW <= 3; ++strideW)
- {
+ for (iT strideW = 1; strideW <= 3; ++strideW) {
vector<int> inputShapeData{1, 5, 7, static_cast<int>(inputC)}; // NHWC
mir::Shape kernelShape{kernelH, kernelW, inputC, outputC}; // HWCN
mir::Shape strides{strideH, strideW};
for (iT channels = 1; channels <= 3; ++channels)
for (iT strideW = 1; strideW <= 3; ++strideW)
for (iT strideH = 1; strideH <= 3; ++strideH)
- for (iT multiplier = 1; multiplier <= 2; ++multiplier)
- {
+ for (iT multiplier = 1; multiplier <= 2; ++multiplier) {
vector<int> inputShapeData{1, 5, 7, static_cast<int>(channels)}; // NHWC
mir::Shape kernelShape{kernelH, kernelW, channels, multiplier}; // HWCN
mir::Shape strides{strideH, strideW};
for (iT windowW = 1; windowW <= 3; ++windowW)
for (iT channels = 1; channels <= 2; ++channels)
for (iT strideH = 1; strideH <= 3; ++strideH)
- for (iT strideW = 1; strideW <= 3; ++strideW)
- {
+ for (iT strideW = 1; strideW <= 3; ++strideW) {
vector<int> shapeData{1, 5, 7, static_cast<int>(channels)};
mir::Shape windowShape{windowH, windowW};
mir::Shape strides{strideH, strideW};
TEST(cpp_operations_test, maxpool)
{
vector<irOps::PoolOp::BorderType> borderTypes{
- irOps::PoolOp::BorderType::EMPTY
- };
+ irOps::PoolOp::BorderType::EMPTY
+ };
genericPoolTest<mir::ops::PoolOp::PoolingType::MAX>(maxPool, borderTypes);
}
TEST(cpp_operations_test, avgpool)
{
vector<irOps::PoolOp::BorderType> borderTypes{
- irOps::PoolOp::BorderType::EMPTY,
- irOps::PoolOp::BorderType::ZEROFILLED
- };
+ irOps::PoolOp::BorderType::EMPTY,
+ irOps::PoolOp::BorderType::ZEROFILLED
+ };
genericPoolTest<mir::ops::PoolOp::PoolingType::AVG>(avgPool, borderTypes);
}
TEST(cpp_operations_test, relu)
{
// test prerequisites
- vector<int> shapeData{2,3,4,5};
+ vector<int> shapeData{2, 3, 4, 5};
Tensor aInputTensor;
vector<unique_ptr<mir::TensorVariant>> inputNTensors(1);
fillTensors(inputNTensors[0], aInputTensor, shapeData, 1.0f);
TEST(cpp_operations_test, elu) {
// test prerequisites
- vector<int> shape_data{2,3,4,5};
+ vector<int> shape_data{2, 3, 4, 5};
Tensor a_input_tensor;
vector<unique_ptr<mir::TensorVariant>> input_n_tensors(1);
fillTensors(input_n_tensors[0], a_input_tensor, shape_data, 1.0f);
TEST(cpp_operations_test, tanh) {
// test prerequisites
- vector<int> shape_data{2,3,4,5};
+ vector<int> shape_data{2, 3, 4, 5};
Tensor a_input_tensor;
vector<unique_ptr<mir::TensorVariant>> input_n_tensors(1);
fillTensors(input_n_tensors[0], a_input_tensor, shape_data, 1.0f);
TEST(cpp_operations_test, softmax)
{
// iterate over number of dimensions in tensor
- for (int numDims = 1; numDims <= 4; ++numDims)
- {
+ for (int numDims = 1; numDims <= 4; ++numDims) {
// test prerequisites
vector<int> shapeData{2, 3, 4, 5};
shapeData.resize(numDims);
TEST(cpp_operations_test, reshape)
{
// test prerequisites
- vector<int> inputShapeData{2,3,4,5};
- vector<int> outputShapeData{1,120};
+ vector<int> inputShapeData{2, 3, 4, 5};
+ vector<int> outputShapeData{1, 120};
mir::Shape nOutputShape;
fillNShape(nOutputShape, outputShapeData);
Tensor aInputTensor;