auto& strides = getStrides();
auto input_rank = input_shape.rank();
- assert(input_rank == 3);
+ assert(input_rank == 4);
assert(kernel_shape.rank() == 4);
- assert(kernel_shape.dim(3) == input_shape.dim(2));
+ assert(kernel_shape.dim(3) == input_shape.dim(3));
Shape output_shape;
output_shape.resize(input_rank);
+ // Assumes no batch strides.
switch (_paddingType) {
case ops::PaddingType::Same:
- for (int32_t d = 0; d < input_rank; ++d)
- output_shape.dim(d) = input_shape.dim(d) * strides.dim(d) - strides.dim(d) + 1;
+ for (int d = 1; d < 3; d++)
+ output_shape.dim(d) = input_shape.dim(d) * strides.dim(d - 1) - strides.dim(d - 1) + 1;
break;
case ops::PaddingType::Valid:
- for (int32_t d = 0; d < input_rank; ++d)
- output_shape.dim(d) =
- input_shape.dim(d) * strides.dim(d) + kernel_shape.dim(d) - strides.dim(d);
+ for (int d = 1; d < 3; d++) {
+ output_shape.dim(d) = input_shape.dim(d) * strides.dim(d - 1) +
+ kernel_shape.dim(d - 1) - strides.dim(d - 1);
+ }
break;
case ops::PaddingType::Custom:
- for (int32_t d = 0; d < input_rank - 1; ++d)
- output_shape.dim(d) =
- input_shape.dim(d) * strides.dim(d) + kernel_shape.dim(d) - strides.dim(d) -
- 2 * getPadding(d);
+ for (int d = 1; d < 3; d++) {
+ output_shape.dim(d) = input_shape.dim(d) * strides.dim(d - 1) +
+ kernel_shape.dim(d - 1) - strides.dim(d - 1) - 2 * getPadding(0);
+ }
break;
default: {
assert(false && "invalid padding type");
}
}
-
+ output_shape.dim(0) = input_shape.dim(0);
output_shape.dim(-1) = kernel_shape.dim(-2);
setOutputShape(0, output_shape);
}
using namespace mir::ops;
std::vector<nnc::mir::TensorVariant> nnc::DeConv2D::operator()() {
- auto res = allocate_tensor(_out_shape);
+ Shape out_shape = _out_shape;
+ auto res = allocate_tensor(out_shape);
Tensor<float> res_accesor(res);
Index pads({_op.getPadding(0), _op.getPadding(1), 0});
- Shape out_shape = res_accesor.getShape();
- out_shape.dim(2) = 1;
+ out_shape.dim(3) = 1;
ShapeRange out_range(out_shape);
- const Shape& in_shape = _input.getShape();
- ShapeRange in_range(_input.getShape());
-
+ Shape in_shape = _input.getShape();
+ ShapeRange in_range(in_shape);
std::shared_ptr<TensorVariant> tr_kernel;
const std::shared_ptr<const mir::TensorVariant> kernel_ptr(
// flag that keeps info on whether the current input element is from input
// or is from dilation by stride
bool is_from_input = true;
- for (int32_t d = 0; d < input_idx.rank()-1; ++d) {
- const auto num = (out_idx.at(d) - kernel.getShape().dim(d) + pads.at(d) +1 + kernel_idx.at(d) );
- const auto div_res = num / _strides.dim(d);
- const auto rem = num % _strides.dim(d);
+ for (int32_t d = 1; d < input_idx.rank() - 1; ++d) {
+ const auto num = (out_idx.at(d) - kernel.getShape().dim(d - 1) + pads.at(d - 1) + 1 +
+ kernel_idx.at(d - 1));
+ const auto div_res = num / _strides.dim(d - 1);
+ const auto rem = num % _strides.dim(d - 1);
is_from_input = is_from_input && rem == 0;
if (rem != 0) break;
input_idx.at(d) = div_res;
}
- input_idx.at(2) = kernel_idx.at(2);
+ // batch is same as output's
+ input_idx.at(0) = out_idx.at(0);
+ // channel index - same as kernel's
+ input_idx.at(3) = kernel_idx.at(2);
// rotate kernel 180 deg around last axis
// by index transform
- for (int32_t d = 0; d < input_idx.rank()-1; ++d) {
+ for (int32_t d = 0; d < 2; ++d) {
kernel_idx.at(d) = kernel.getShape().dim(d) - kernel_idx.at(d) -1;
}
: _input(input), _kernel(op.getKernel()), _strides(op.getStrides()),
_padding(op.getPaddingType()), _out_shape(op.getOutputShape(0)), _op(op) {
- assert(_op.getInputShape(0).rank() == 3);
- assert(input.getShape().rank() == 3);
- assert(_kernel.getShape().rank() == 4);
- const auto& ks = _kernel.getShape();
- const auto& is = input.getShape();
+ assert(_op.getInputShape(0).rank() == 4);
+ const auto& kernel_shape = _kernel.getShape();
+ const auto& inp_shape = input.getShape();
+ assert(inp_shape.rank() == 4);
+ assert(kernel_shape.rank() == 4);
assert(_strides.dim(2) == 1);
assert(_op.getPadding(2) == 0);
- // kernel shape is [hw"oc""ic"]; input's - [hw"ic"]
- assert(ks.dim(3) == is.dim(2));
+ // kernel shape inp_shape [hw"oc""ic"]; input's - ["batch"hw"ic"]
+ assert(kernel_shape.dim(3) == inp_shape.dim(3));
}
} // namespace nnc
void convTransposed2d(Tensor &out, const char *params, const Tensor &in) {
const float *input = in.getData();
- RuntimeShape input_shape = shapeToRuntimeShapePad4(in.getShape());
+ RuntimeShape input_shape = shapeToRuntimeShape(in.getShape());
KernelRT kernel = deserializeKernelRT(params);
Shape strides = deserializeShape(params);
// pads type. unused for now
out.reShape(out_s);
- RuntimeShape out_shape = shapeToRuntimeShapePad4(out_s);
+ RuntimeShape out_shape = shapeToRuntimeShape(out_s);
const short stride_w = strides[1];
const short stride_h = strides[0];
const short pad_w = pads[1];
const short pad_h = pads[0];
- const int kw = kernel.shape.Dims(2);
- const int kh = kernel.shape.Dims(1);
+ const int ker_width = kernel.shape.Dims(2);
+ const int ker_height = kernel.shape.Dims(1);
- RuntimeShape im2col_shape = RuntimeShape({1,1,(int) (out_s[0]*out_s[1]),
- input_shape.Dims(3)*kw*kh});
+ RuntimeShape im2col_shape = RuntimeShape({
+ (int)out_s[0],
+ (int)out_s[1],
+ (int)out_s[2],
+ // in depth
+ input_shape.Dims(3) * ker_width * ker_height
+ });
const auto convPara = ConvParams({PaddingType::kSame,
PaddingValues({pad_w,pad_h}), stride_w, stride_h});
for (iT outputC = 1; outputC <= 3; ++outputC)
for (iT strideH = 1; strideH <= 3; ++strideH)
for (iT strideW = 1; strideW <= 3; ++strideW) {
- vector<int> inputShapeData{9, 3, static_cast<int>(inputC)}; // HWC
+ vector<int> inputShapeData{3, 9, 3, static_cast<int>(inputC)}; // NHWC
mir::Shape kernelShape{kernelH, kernelW, outputC, inputC};
mir::Shape strides{strideH, strideW, 1};
vector<unique_ptr<mir::TensorVariant>> inputNTensors(1);
fillTensors(inputNTensors[0], aInputTensor, inputShapeData, 1.0f);
auto padT = mir::ops::PaddingType::Same;
mir::TensorVariant kernel = createNTensor(kernelShape, 1.0f);
- auto opGenerator = [kernel, strides, padT](mir::Graph &g,
- const std::vector<mir::IODescriptor>& inputs) {
+ auto opGenerator = [kernel, strides, padT](
+ mir::Graph& g, const std::vector<mir::IODescriptor>& inputs) {
+
return g.create<mir::ops::DeConv2DOp>("y", inputs[0], kernel, strides, padT);
};