Kernel bias = deserializeKernel(params);
out.reShape(in.getShape());
- out.fillData(in.getData());
+ out.fillData(in.getData(), in.getShape().getNumElems());
AddBiasAndEvalActivationFunction(bias.data, bias.dims, out.getData(), shapeToDims(out.getShape()));
}
// These operations (add, mul, max) takes as input multiple tensors, at least 2, likely less then 7
// parameter pack provides generalization for all possible number of inputs
-template <typename F, class ...Args>
-void ElementWise(Tensor &out,
- const char *params, const Args &...inputs) {
- const float *input[] = {inputs.getData()...};
- RuntimeShape in_shapes[] = {shapeToRuntimeShape(inputs.getShape())...};
+template <typename F, typename ...Args>
+void ElementWise(Tensor &out, const char* params, const Args& ...inputs) {
+ static_assert(sizeof...(inputs) >= 2, "ElementWise op must have >= 2 inputs");
+
+ const float* input[] = {inputs.getData()...};
+ Shape in_shapes[] = {inputs.getShape()...};
+ RuntimeShape in_runtime_shapes[] = {shapeToRuntimeShape(inputs.getShape())...};
- const int32_t num_inputs = sizeof(input) / sizeof(const float*);
+ const int32_t num_inputs = sizeof...(inputs);
const bool needs_broadcast = (bool)deserializeT<int32_t>(params);
const Shape out_shape = deserializeShape(params);
out.reShape(out_shape);
- out.fillData(input[0]);
+ out.fillData(input[0], in_shapes[0].getNumElems());
const auto out_rt = shapeToRuntimeShape(out_shape);
if (!needs_broadcast) {
for (int32_t i = 1; i < num_inputs; ++i) {
F::Call(out.getData(), out_rt,
- input[i], in_shapes[i],
+ input[i], in_runtime_shapes[i],
out.getData(), out_rt,
- 0);
+ false);
}
} else {
- auto running_shape = RuntimeShape::ExtendedShape(4, in_shapes[0]);
+ auto running_shape = RuntimeShape::ExtendedShape(4, in_runtime_shapes[0]);
+ std::vector<float> inp_tmp(out_shape.getNumElems());
+
for (int32_t i = 1; i < num_inputs; ++i) {
- float* inp_tmp = new float[running_shape.FlatSize()];
- memcpy(inp_tmp, out.getData(), (size_t)running_shape.FlatSize() * 4);
- F::Call(inp_tmp, running_shape,
- input[i], in_shapes[i],
+ assert(running_shape.FlatSize() <= out_shape.getNumElems());
+
+ std::copy(out.getData(), out.getData() + running_shape.FlatSize(), inp_tmp.begin());
+
+ F::Call(inp_tmp.data(), running_shape,
+ input[i], in_runtime_shapes[i],
out.getData(), out_rt,
- 1);
+ true);
// This modifies the running shape
- running_shape.maxShape(RuntimeShape::ExtendedShape(4, in_shapes[i]));
+ running_shape.maxShape(RuntimeShape::ExtendedShape(4, in_runtime_shapes[i]));
}
}
assert(out_s.getNumElems() == in.getShape().getNumElems());
out.reShape(out_s);
- out.fillData(in.getData());
+ out.fillData(in.getData(), in.getShape().getNumElems());
}
void reduceMean(Tensor& out, const char* params, const Tensor& in) {