Rename `ReduceFOp` to `ReduceOp`, `ReduceN` to `Reduce`.
Speed up `ReduceOp` operation implementation in interpreter.
Reformat according to coding style.
Remove unused files.
Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
void visit(ops::OutputOp& op) override;
void visit(ops::PadOp& op) override;
void visit(ops::PoolOp& op) override;
- void visit(ops::ReduceFOp& op) override;
+ void visit(ops::ReduceOp& op) override;
void visit(ops::ReluOp& op) override;
void visit(ops::ReshapeOp& op) override;
void visit(ops::ResizeOp& op) override;
namespace mir {
namespace ops {
-class ReduceFOp : public Operation {
+class ReduceOp : public Operation {
public:
enum class FuncType {
mean, //TODO add other reducers
* @param keep_dims whether to keep the original rank
* @param func_type function to reduce the tensor with (should be associative)
*/
- ReduceFOp(Output* arg,
+ ReduceOp(Output* arg,
std::vector<int32_t> reduce_dims,
bool keep_dims,
FuncType func_type)
- : Operation(Type::reduceF, {arg}), _reduceDims(std::move(reduce_dims)), _keepDims(keep_dims),
+ : Operation(Type::reduce, {arg}), _reduceDims(std::move(reduce_dims)), _keepDims(keep_dims),
_funcType(func_type) {
// Infer output shapes.
};
Operation* copyWithInputs(const std::vector<Output*>& inputs) override {
- return new ReduceFOp(inputs[0], _reduceDims, _keepDims, _funcType);
+ return new ReduceOp(inputs[0], _reduceDims, _keepDims, _funcType);
}
- const std::vector<int32_t>& getReductionDims() { return _reduceDims; };
+ const std::vector<int32_t>& getReductionDims() const { return _reduceDims; };
bool getKeepDims() const { return _keepDims; };
HANDLE_OP(output, OutputOp)
HANDLE_OP(pad, PadOp)
HANDLE_OP(pool, PoolOp)
-HANDLE_OP(reduceF, ReduceFOp)
+HANDLE_OP(reduce, ReduceOp)
HANDLE_OP(ReLU, ReluOp)
HANDLE_OP(reshape, ReshapeOp)
HANDLE_OP(resizeIm, ResizeOp)
#include "mir/ops/OutputOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
dotBuilder.updateWithOp(&op, node_info);
}
-void IrDotDumper::visit(ops::ReduceFOp& op) {
- static const std::map<ops::ReduceFOp::FuncType, const char*> types{
- {ops::ReduceFOp::FuncType::mean, "mean"}
+void IrDotDumper::visit(ops::ReduceOp& op) {
+ static const std::map<ops::ReduceOp::FuncType, const char*> types{
+ {ops::ReduceOp::FuncType::mean, "mean"}
};
- auto node_info = DotIrNodeInfo().withType("ReduceFOp", op.getName())
+ auto node_info = DotIrNodeInfo().withType("ReduceOp", op.getName())
.withInShapes(getInputShapes(op))
.withOutShapes(getOutputShapes(op))
.withShape("Reduction dims", Shape(op.getReductionDims())) // appropriated shape to dims
#include "mir/ops/OutputOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
#include "mir/ops/ElementwiseOp.h"
#include "mir/ops/ResizeOp.h"
#include "mir/ops/SqueezeOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/Shape.h"
#include <vector>
auto input = g.create<ops::InputOp>("input", Shape{10, 2, 10, 9});
- auto n = g.create<ops::ReduceFOp>("reduce", input->getOutput(0), std::vector<int32_t>{1, 3},
- false, ops::ReduceFOp::FuncType::mean);
+ auto n = g.create<ops::ReduceOp>("reduce", input->getOutput(0), std::vector<int32_t>{1, 3},
+ false, ops::ReduceOp::FuncType::mean);
ASSERT_EQ(resultShape, n->getOutputShape(0));
}
#include "mir/ops/LeakyReluOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
void visit(ops::GemmOp &op) override { assert(false && "GemmOp"); }
void visit(ops::LeakyReluOp &op) override { assert(false && "LeakyReluOp"); }
void visit(ops::PadOp &op) override { assert(false && "PadOp"); }
- void visit(ops::ReduceFOp &op) override { assert(false && "ReduceFOp"); }
+ void visit(ops::ReduceOp &op) override { assert(false && "ReduceOp"); }
void visit(ops::ReluOp &op) override { assert(false && "ReluOp"); }
void visit(ops::ResizeOp &op) override { assert(false && "ResizeOp"); }
void visit(ops::ScaleOp &op) override { assert(false && "ScaleOp"); }
void visit(mir::ops::OutputOp& op) override;
void visit(mir::ops::PadOp& op) override;
void visit(mir::ops::PoolOp& op) override;
- void visit(mir::ops::ReduceFOp& op) override;
+ void visit(mir::ops::ReduceOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
#include "mir/ops/OutputOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
throw AclCppException("Unimplemented operation: Resize");
}
-void AclCppOpGenerator::visit(mir::ops::ReduceFOp& /*op*/) {
- throw AclCppException("Unimplemented operation: ReduceFOp");
+void AclCppOpGenerator::visit(mir::ops::ReduceOp& /*op*/) {
+ throw AclCppException("Unimplemented operation: ReduceOp");
}
void AclCppOpGenerator::genTranspose(const std::shared_ptr<nnc::ArtifactId>& input,
void visit(mir::ops::OutputOp& op) override;
void visit(mir::ops::PadOp& op) override;
void visit(mir::ops::PoolOp& op) override;
- void visit(mir::ops::ReduceFOp& op) override;
+ void visit(mir::ops::ReduceOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
#include "mir/ops/LeakyReluOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ResizeOp.h"
#include "mir/ops/ScaleOp.h"
setOutputTensors(op, std::move(outputs));
}
-void NNInterpreter::visit(ops::ReduceFOp& op) {
+void NNInterpreter::visit(ops::ReduceOp& op) {
auto inputs = getInputTensors(op);
-
- assert(op.getFuncType() == ops::ReduceFOp::FuncType::mean);
-
- // should always be an integer in a float
- // NOLINTNEXTLINE(bugprone-integer-division)
- const float reduction_area = op.getInputShape(0).numElements() /
- op.getOutputShape(0).numElements();
-
- auto tmp = ReduceN<float>(op.getInputShape(0), op.getOutputShape(0), inputs[0],
- op.getReductionDims(),
- [](float running_sum, float item) { return running_sum + item; })();
- Tensor<float> out_t(tmp[0]); // for numerical stability
- auto outputs = Fill<float>(op.getOutputShape(0), [&out_t, reduction_area](const Index& id) {
- return out_t.at(id) / reduction_area;
- })();
+ auto outputs = Reduce<float>(inputs[0], op)();
setOutputTensors(op, std::move(outputs));
}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BatchNorm.h"
-
-//Stub to ensure BatchNorm.h is compiled
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Concat.h"
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Dropout.h"
-
-//Stub to ensure Dropout.h is compiled
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Fill.h"
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Gemm.h"
-//Do not remove
-//Used to force compile Gemm.h
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reduce.h"
* limitations under the License.
*/
-#ifndef _NNC_CORE_BACKEND_INTERPRETER_REDUCE_IMPL_
-#define _NNC_CORE_BACKEND_INTERPRETER_REDUCE_IMPL_
+#ifndef _NNC_CORE_BACKEND_INTERPRETER_REDUCE_
+#define _NNC_CORE_BACKEND_INTERPRETER_REDUCE_
-#include <functional>
-
-#include "mir/Shape.h"
+#include "OperationImpl.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/Tensor.h"
#include "mir/ShapeRange.h"
-#include "OperationImpl.h"
-#include "Fill.h"
-
namespace nnc {
template<typename T>
-class ReduceN : public OperationImpl<T> {
+class Reduce : public OperationImpl<T> {
public:
- /**
- * @brief Reduces a tensor to output shape
- * @param inputShape
- * @param outputShape
- * @param input Stores the values
- * @param reductionDims vector of dims to reduce to 1
- * @param reduceFunc function to reduce the tensor with (should be associative)
- */
- ReduceN(const mir::Shape& inputShape, const mir::Shape& outputShape,
- const mir::TensorVariant& input,
- std::vector<int32_t> reductionDims, std::function<T(const T&, const T&)> reduceFunc)
- : _inShape(inputShape), _outputShape(outputShape), _input(input),
- _reductionDims(reductionDims),
- _reduceFunc(reduceFunc) {
- if (inputShape.rank() == outputShape.rank()) {
- for (auto axis: reductionDims) {
- assert(outputShape.dim(axis) == 1);
- }
- _keepDims = true;
- }
- }
+ Reduce(const mir::TensorVariant& input, const mir::ops::ReduceOp& op)
+ : _input(input), _op(op) {}
std::vector<mir::TensorVariant> operator()() override {
- auto res = this->allocate_tensor(_outputShape);
- mir::Tensor<T> res_accesor(res);
-
- mir::Index out_id;
- out_id.resize(_outputShape.rank());
- for (const mir::Index& input_id : mir::ShapeRange(_inShape)) {
- int32_t out_idx_id = 0;
-
- // This mask contains true for axis indexes that should be reduced
- // for example, if we want to reduce 1 and 3 axes, with total number of dims 4
- // mask will contain: [false, true, false, true]
- std::vector<bool> reduce_axis_mask(_inShape.rank(), false);
- for (auto axis: _reductionDims)
- reduce_axis_mask[axis] = true;
+ const auto& input_shape = _op.getInputShape(0);
+ const auto& output_shape = _op.getOutputShape(0);
+ const auto& reduction_dims = _op.getReductionDims();
+ const bool keep_dims = _op.getKeepDims();
+
+ assert(_op.getFuncType() == mir::ops::ReduceOp::FuncType::mean);
+ const auto reductor = [](T result, T x) { return result + x; };
+
+ auto res = OperationImpl<T>::allocate_tensor(output_shape);
+ mir::Tensor<T> res_accessor(res);
+
+ // This mask contains `true` for axis that should be reduced. For example, if we want to reduce
+ // axes 1 and 3 with total number of axes of 4, the mask will be [false, true, false, true].
+ std::vector<bool> reduce_axis_mask(input_shape.rank(), false);
+ for (auto axis : reduction_dims) {
+ reduce_axis_mask[axis] = true;
+ }
- // change out id to point to the correct cell
- if (_keepDims) {
- for (int32_t d = 0; d < _inShape.rank(); ++d)
- out_id.at(out_idx_id++) = reduce_axis_mask[d] ? 0 : input_id.at(d);
- } else {
- for (int32_t d = 0; d < _inShape.rank(); ++d) {
- if (reduce_axis_mask[d])
- continue;
- out_id.at(out_idx_id++) = input_id.at(d);
+ mir::Index out_index;
+ out_index.resize(output_shape.rank());
+ for (const mir::Index& in_index : mir::ShapeRange(input_shape)) {
+ int32_t out_index_dim = 0;
+ for (int32_t d = 0; d < input_shape.rank(); ++d) {
+ if (keep_dims) {
+ out_index.at(out_index_dim++) = reduce_axis_mask[d] ? 0 : in_index.at(d);
+ } else {
+ if (!reduce_axis_mask[d]) {
+ out_index.at(out_index_dim++) = in_index.at(d);
+ }
}
}
- res_accesor.at(out_id) = _reduceFunc(res_accesor.at(out_id), _input.at(input_id));
+ res_accessor.at(out_index) = reductor(res_accessor.at(out_index), _input.at(in_index));
+ }
+
+ const int32_t reduction_factor = input_shape.numElements() / output_shape.numElements();
+
+ for (const auto& index : mir::ShapeRange(output_shape)) {
+ res_accessor.at(index) /= reduction_factor;
}
return {res};
}
private:
-
- const mir::Shape& _inShape;
- const mir::Shape& _outputShape;
const mir::Tensor<T> _input;
- const std::vector<int32_t> _reductionDims;
- const std::function<T(T, T)> _reduceFunc;
- bool _keepDims = false;
+ const mir::ops::ReduceOp& _op;
};
-
} // namespace nnc
-#endif //_NNC_CORE_BACKEND_INTERPRETER_REDUCE_IMPL_
+#endif //_NNC_CORE_BACKEND_INTERPRETER_REDUCE_
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax.h"
-
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cmath>
-
-#include "mir/ShapeRange.h"
-
-#include "conv_FFT.h"
-#include "common.h"
-
-namespace nnc
-{
-
-using namespace mir;
-using namespace mir::ops;
-
-// Mostly compatible with tensorflow implementation
-// Assuming input is in NHWC format with batch omitted( [in_height, in_width, in_channels] )
-// Kernel is in [filter_height, filter_width, in_channels, out_channels]
-// Refer to https://www.tensorflow.org/api_docs/python/tf/nn/conv2d for info
-std::vector<TensorVariant> Conv2D_FFT::operator()()
-{
- Index pads({_op.getPaddingBefore().at(0), _op.getPaddingBefore().at(1)});
- //
- // 1. Pad input (currently only with clamp to zero, maybe clamp to edge and wrap later)
- auto inputPadded = pad_input(pads);
- uint64_t spectreSize = inputPadded.size();
-
- Shape paddedShape = _input.getShape();
- int rank = paddedShape.rank();
- for (int i = 0; i < rank; i++)
- {
- paddedShape.dim(i) += 2 * pads.at(i);
- }
-
- // Correct pads to properly index the output
- if (pads.at(0) == 0)
- {
- pads.at(0) = _kernel.getShape().dim(0) / 2;
- }
- if (pads.at(1) == 0)
- {
- pads.at(1) = _kernel.getShape().dim(1) / 2;
- }
-
- // 2. Unpack kernels (for separate output channels) and pad them with zeroes to match padded input shape
- auto results = unpack_and_pad_kernels(paddedShape, spectreSize);
-
- // 3. FFT input and kernels
- fft_CT(inputPadded.data(), spectreSize);
- for (auto &kernel : results)
- {
- fft_CT(kernel.data(), spectreSize);
- }
-
- // 4. Elementwise production
- elementwise_product(inputPadded, results);
-
- // 5. IFFT input and match output shape
-
- auto res = ifft(results, paddedShape, _out_shape, _strides, pads);
-
- return {res};
-}
-
-Conv2D_FFT::Conv2D_FFT(const TensorVariant& input,
- const TensorVariant& kernel,
- const Conv2DOp& op)
- : _input(input), _kernel(kernel), _strides(op.getStrides()),
- _out_shape(op.getOutputShape(0)), _op(op)
-{
- // Same assertions as in Conv2D
- assert(_op.getInputShape(0).rank() == 3);
- assert(input.getShape().rank() == 3);
- assert(_kernel.getShape().rank() == 4);
- assert(_strides.dim(2) == 1);
-}
-
-std::vector<FFT_complex> Conv2D_FFT::pad_input(const Index &pads)
-{
- // Calculate new shape: just add paddings
- int32_t height = _input.getShape().dim(0),
- width = _input.getShape().dim(1);
-
- Shape newShape = _input.getShape();
- int rank = newShape.rank();
-
- uint64_t paddedSize = 1;
- for (int i = 0; i < rank; i++)
- {
- newShape.dim(i) += 2 * pads.at(i);
- paddedSize *= newShape.dim(i);
- }
-
- uint64_t spectreSize = 1;
- while (spectreSize < paddedSize)
- {
- spectreSize *= 2;
- }
-
- std::vector<FFT_complex> res(spectreSize, FFT_complex(0.0f, 0.0f));
-
- ShapeRange outRange(newShape);
- uint64_t i = 0;
- Index unpaddedIndex;
- for (auto outIdx : outRange)
- {
- // Fill paddings with zeroes
- if (outIdx.at(0) < pads.at(0) ||
- outIdx.at(1) < pads.at(1) ||
- outIdx.at(0) >= (pads.at(0) + height) ||
- outIdx.at(1) >= (pads.at(1) + width))
- {
- //res[i] = FFT_complex(0.0f, 0.0f);
- // Already done by vector constructor
- }
- // Copy values from input
- else
- {
- unpaddedIndex = outIdx;
- unpaddedIndex.at(0) -= pads.at(0);
- unpaddedIndex.at(1) -= pads.at(1);
- res[i] = FFT_complex(_input.at(unpaddedIndex), 0.0f);
- }
- i++;
- }
-
- return res;
-}
-
-std::vector<std::vector<FFT_complex>> Conv2D_FFT::unpack_and_pad_kernels(const Shape &paddedInputShape, const uint64_t spectreSize)
-{
- const Shape &kShape = _kernel.getShape();
- int32_t numKernels = kShape.dim(3);
-
- // Vector to store results to
- std::vector<std::vector<FFT_complex>> paddedKernels;
- for (int32_t n = 0; n < numKernels; n++) {
- std::vector<FFT_complex> one_kernel(spectreSize, FFT_complex(0.0f, 0.0f));
- paddedKernels.push_back(one_kernel);
- }
- // Unpack kernels
- int64_t shift = kShape.dim(2) - 1 + kShape.dim(2) *
- ((kShape.dim(0) - 1) / 2 * paddedInputShape.dim(1) + (kShape.dim(1) - 1) / 2) ;
- ShapeRange kernelRange(kShape);
- for (auto &kIdx: kernelRange)
- {
- if (kIdx.at(0) < kShape.dim(0) &&
- kIdx.at(1) < kShape.dim(1) &&
- kIdx.at(2) < kShape.dim(2))
- {
- Index kernelIdx = kIdx;
- // The resulting kernel is mirrored and shifted to make output elements correspond to elements of original tensor
- int64_t shifted_index = (static_cast<int64_t>(spectreSize) - shift + kernelIdx.at(2) + paddedInputShape.dim(2) *
- (kernelIdx.at(1) + paddedInputShape.dim(1) * kernelIdx.at(0))) % spectreSize;
- kernelIdx.at(0) = kShape.dim(0) - kernelIdx.at(0) - 1;
- kernelIdx.at(1) = kShape.dim(1) - kernelIdx.at(1) - 1;
- kernelIdx.at(2) = kShape.dim(2) - kernelIdx.at(2) - 1;
-
- paddedKernels[kernelIdx.at(3)][shifted_index] = _kernel.at(kernelIdx);
- }
- }
-
-
- return paddedKernels;
-}
-
-void Conv2D_FFT::elementwise_product(const std::vector<FFT_complex> &input,
- std::vector<std::vector<FFT_complex>> &kernels)
-{
- size_t size = input.size();
- for (auto &kernel : kernels)
- {
- for (size_t i = 0; i < size; i++)
- {
- kernel[i] *= input[i];
- }
- }
-}
-
-
-TensorVariant Conv2D_FFT::ifft(std::vector<std::vector<FFT_complex>> &spectres,
- const Shape &inShape,
- const Shape &outShape,
- const Shape &strides,
- const Index &paddings)
-{
- // TODO: maybe add some asserts()
-
- // Perform inverse FFT
- for (auto &result : spectres)
- {
- ifft_CT(result.data(), result.size());
- }
-
- // Allocate tensor
- TensorVariant res = allocate_tensor(outShape);
- auto resAccessor = Tensor<float>(res);
-
- // Move our results to it
- ShapeRange outRange(outShape);
- int32_t width = inShape.dim(1);
- // We have to multiply by number of channels, because
- // only every first of three elements corresponds to
- // correct convolution by channels, the rest are
- // results of shifted convolution
- int32_t inChannels = inShape.dim(2);
- for (auto &outIdx : outRange)
- {
- resAccessor.at(outIdx) = spectres[outIdx.at(2)][inChannels * ((outIdx.at(0) * strides.dim(0) + paddings.at(0)) * width +
- outIdx.at(1) * strides.dim(1) + paddings.at(1))].real();
- }
-
- return res;
-}
-
-void Conv2D_FFT::separate (FFT_complex* array,
- const uint64_t elements)
-{
- const uint64_t half_size = elements / 2;
-
- // Temporary heap to store odd elements.
- auto tmp = new FFT_complex[half_size];
- for (uint64_t i = 0; i < half_size; i++) {
- tmp[i] = array[i * 2 + 1];
- }
-
- // Copy even elements
- for (uint64_t i = 0; i < half_size; i++) {
- array[i] = array[i * 2];
- }
- // Copy odd elements
- for (uint64_t i = 0; i < half_size; i++) {
- array[i + half_size] = tmp[i];
- }
-
- delete[] tmp;
-}
-
-void Conv2D_FFT::fft_CT(FFT_complex* array,
- const uint64_t elements)
-{
- if (elements > 1) {
- separate(array, elements);
- fft_CT(array, elements / 2);
- fft_CT(array + elements / 2, elements / 2);
- for(size_t i = 0; i < elements / 2; i++) {
- FFT_complex even = array[i];
- FFT_complex odd = array[i + elements / 2];
- FFT_complex twiddle = exp(FFT_complex(0, -2. * M_PI * i / elements));
-
- array[i] = even + twiddle * odd;
- array[i + elements / 2] = even - twiddle * odd;
- }
- }
-}
-
-// TODO: using paddings and strides we can theoretically reduce number of elements calculated
-void Conv2D_FFT::ifft_CT(FFT_complex* array,
- const uint64_t elements)
-{
- if (elements > 1) {
- separate(array, elements);
- ifft_CT(array, elements / 2);
- ifft_CT(array + elements / 2, elements / 2);
- for(size_t i = 0; i < elements / 2; i++) {
- FFT_complex even = array[i];
- FFT_complex odd = array[i + elements / 2];
- FFT_complex twiddle = exp(FFT_complex(0, 2. * M_PI * i / elements));
-
- array[i] = (even + twiddle * odd) / FFT_complex(2.0f, 0.0f);
- array[i + elements / 2] = (even - twiddle * odd) / FFT_complex(2.0f, 0.0f);
- }
- }
-}
-
-} // namespace nnc
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// This implementation of 2D convolution
-// uses Fast Fourier Transform to speed up
-// computation on larger matrices.
-//
-// For principles of work refer to:
-// - Convolution theorem
-// - Discrete Fourier Transform
-// - Fast Fourier Transform
-// - https://arxiv.org/abs/1312.5851
-//
-// This implementation is for testing purposes and
-// speeding up the interpreter. After we decide on
-// CG IR, FG IR and code generation this will be
-// implemented as optimization pass.
-//
-//
-// No implementation yet, so the interfaces
-// of some methods are subject to change.
-//
-
-
-#ifndef _NNC_CORE_BACKEND_INTERPRETER_CONV2D_FFT_IMPL_
-#define _NNC_CORE_BACKEND_INTERPRETER_CONV2D_FFT_IMPL_
-
-#include "OperationImpl.h"
-#include "mir/ops/Conv2DOp.h"
-#include "mir/Tensor.h"
-#include <complex>
-
-namespace nnc
-{
-
-using FFT_complex = std::complex<float>;
-
-
-class Conv2D_FFT : public OperationImpl<float>
-{
-public:
- Conv2D_FFT(const mir::TensorVariant& input,
- const mir::TensorVariant& kernel,
- const mir::ops::Conv2DOp& op);
-
- std::vector<mir::TensorVariant> operator()() override;
-
-protected:
- ///
- /// Pad input (with zeroes) according to selected padding type (paddings are calculated in ShapeInference)
- ///
- std::vector<FFT_complex> pad_input(const mir::Index &pads);
-
- ///
- /// Unpack kernels for each out_channel and pad them with zeroes to input size
- ///
- std::vector<std::vector<FFT_complex>> unpack_and_pad_kernels(const mir::Shape &paddedInputShape,
- uint64_t spectreSize);
-
- ///
- /// This function performs elementwise product of input by each kernel
- /// and writes result back to kernels vector.
- ///
- void elementwise_product(const std::vector<FFT_complex> &input,
- std::vector<std::vector<FFT_complex>> &kernels);
- ///
- /// Perform Inverse Fast Fourier transform on elementwise products results. Return result of the convolution.
- ///
- mir::TensorVariant ifft(std::vector<std::vector<FFT_complex>> &spectres,
- const mir::Shape &inShape,
- const mir::Shape &outShape,
- const mir::Shape &strides,
- const mir::Index &paddings);
-
- ///
- /// Separate even/odd elements to lower/upper halves of array respectively.
- /// This allows in-place computation.
- ///
- void separate(FFT_complex* array, uint64_t elements);
-
- ///
- /// Concrete in-place implementation of FFT
- /// (using Cooley-Tukey algorithm, hence "_CT" suffixes)
- ///
- void fft_CT(FFT_complex* array, uint64_t elements);
-
- ///
- /// Concrete in-place implementation of inverse FFT
- ///
- void ifft_CT(FFT_complex* array, uint64_t elements);
-
-private:
- const mir::Tensor<float> _input;
- mir::Tensor<float> _kernel;
- const mir::Shape _strides;
- const mir::Shape &_out_shape;
- const mir::ops::Conv2DOp &_op;
-};
-
-} // namespace nnc
-
-#endif //_NNC_CORE_BACKEND_INTERPRETER_CONV2D_FFT_IMPL_
#include "mir/ops/OutputOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
appendOperationToInference(&op, "pad");
}
-void ModelAnalyzer::visit(mir::ops::ReduceFOp& op) {
+void ModelAnalyzer::visit(mir::ops::ReduceOp& op) {
switch (op.getFuncType()) {
- case mir::ops::ReduceFOp::FuncType::mean:
+ case mir::ops::ReduceOp::FuncType::mean:
appendOperationToInference(&op, "reduceMean");
break;
default:
void visit(mir::ops::OutputOp& op) override;
void visit(mir::ops::PadOp& op) override;
void visit(mir::ops::PoolOp& op) override;
- void visit(mir::ops::ReduceFOp& op) override;
+ void visit(mir::ops::ReduceOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
#include "mir/ops/LeakyReluOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
serializeShape(op.getOutputShape(0));
}
-void Serializer::visit(mir::ops::ReduceFOp& op) {
+void Serializer::visit(mir::ops::ReduceOp& op) {
_curOp->paramStartOffset = _buffer.size();
serializeShape(Shape(op.getReductionDims())); // reuse shape serialization
serializeT<int32_t>(op.getKeepDims());
void visit(mir::ops::OutputOp& op) override;
void visit(mir::ops::PadOp& op) override;
void visit(mir::ops::PoolOp& op) override;
- void visit(mir::ops::ReduceFOp& op) override;
+ void visit(mir::ops::ReduceOp& op) override;
void visit(mir::ops::ReluOp& op) override;
void visit(mir::ops::ReshapeOp& op) override;
void visit(mir::ops::ResizeOp& op) override;
#include "mir/ops/LeakyReluOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
mir::Tensor<int32_t> axes_tensor(extractTensor(inputs.at(1)));
std::vector<int32_t> axes = convertIntTensorToVector<int32_t>(axes_tensor);
- auto result = createOp<ops::ReduceFOp>(input, axes, opts->keep_dims(),
- ops::ReduceFOp::FuncType::mean);
+ auto result = createOp<ops::ReduceOp>(input, axes, opts->keep_dims(),
+ ops::ReduceOp::FuncType::mean);
return {result->getOutput(0)};
}
#include "mir/Shape.h"
#include "mir/ops/CommonProps.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ElementwiseOp.h"
#include "schema_generated.h"
#include "mir/ops/OutputOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ScaleOp.h"
#include "mir/ops/OutputOp.h"
#include "mir/ops/PadOp.h"
#include "mir/ops/PoolOp.h"
-#include "mir/ops/ReduceFOp.h"
+#include "mir/ops/ReduceOp.h"
#include "mir/ops/ReluOp.h"
#include "mir/ops/ReshapeOp.h"
#include "mir/ops/ResizeOp.h"
fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f);
auto op_generator = [&axis_list, keep_dims](
mir::Graph& g, const std::vector<mir::Operation::Output*>& inputs) {
- auto op = g.create<mir::ops::ReduceFOp>(
+ auto op = g.create<mir::ops::ReduceOp>(
"y", inputs[0], axis_list, keep_dims,
- mir::ops::ReduceFOp::FuncType::mean);
+ mir::ops::ReduceOp::FuncType::mean);
return op;
};