class GemmOp : public Operation {
public:
- GemmOp(IODescriptor arg, const IODescriptor b, const IODescriptor c) :
- Operation(Type::gemmOp, {arg, b, c}) {
+ GemmOp(IODescriptor a, IODescriptor b, IODescriptor c) :
+ Operation(Type::gemmOp, {a, b, c}) {
inferOutputShapes();
}
} // namespace ops
} // namespace mir
} // namespace nnc
-
#endif //_NNC_CORE_IR_MODEL_GEMM_OP_H_
void visit(ops::Conv2DOp& op) override;
void visit(ops::DeConv2DOp& op) override;
void visit(ops::DepthwiseConv2DOp& op) override;
- void visit(ops::GemmOp& op) override;
void visit(ops::DropoutOp& op) override;
void visit(ops::ElementwiseOp& op) override;
void visit(ops::EluOp& op) override;
void visit(ops::FullyConnectedOp& op) override;
void visit(ops::GatherOp& op) override;
+ void visit(ops::GemmOp& op) override;
void visit(ops::LeakyReluOp& op) override;
void visit(ops::PadOp& op) override;
void visit(ops::PoolOp& op) override;
if (do_it || all) {
auto last_idx = shape.rank() - 1;
for (auto idx : ShapeRange(shape)) {
- if (!idx.at(last_idx))
+ if (!(idx.at(last_idx) % 15))
std::cout << "\n";
dumpIndex(idx);
if (tensor.getDataType() == DTYPE::FLOAT32)
// Check nodes
const auto& outputs = g->collectOutputs();
-
+#if 0
+ interpreter.dump(*outputs[0], true);
+#endif
+
for (auto& out : outputs) {
auto outputNode = interpreter.getResult(out);
- if (outputNode.empty()) {
+ if (outputNode.empty())
throw PassException("No value for output node <" + out->getName() + ">");
- } else {
- std::cout << "Output node <" + out->getName() + "> found" << std::endl;
- }
}
bool is_several_outs = (outputs.size() > 1);
template<typename T>
class Gemm : public OperationImpl<T> {
public:
- Gemm(const mir::TensorVariant& a, const mir::TensorVariant& b, const mir::TensorVariant& c,
- mir::ops::GemmOp& op) : _op(op), _tensor_a(a), _tensor_b(b), _tensor_c(c) {}
+ Gemm(const mir::TensorVariant& a, const mir::TensorVariant& b,
+ const mir::TensorVariant& c, mir::ops::GemmOp& op) :
+ _op(op), _tensor_a(a), _tensor_b(b), _tensor_c(c) {}
std::vector<mir::TensorVariant> operator()() override {
mir::TensorVariant res = OperationImpl<T>::allocate_tensor(_op.getOutputShape(0));
mir::Tensor<T> accessor(res);
mir::ShapeRange out_range(res.getShape());
-// mir::Tensor<T> tensor_b(_b);
auto b_shape = _tensor_b.getShape();
int32_t b_rank = b_shape.rank();
// We'd like to broadcast Tensor C to the output shape
assert(_op.getOutputShape(0).rank() == 2);
assert((_op.getOutputShape(0).rank() == _op.getInputShape(2).rank()) ||
- ((_op.getInputShape(2).rank() == 1) && (_op.getOutputShape(0).dim(0) == 1)));
+ ((_op.getInputShape(2).rank() == 1) &&
+ (_op.getOutputShape(0).dim(0) == 1)));
auto t = mir::TensorVariant (_tensor_c, _op.getOutputShape(0));
mir::Tensor<T> tensor_c(t);
mir::ops::GemmOp& _op;
mir::Tensor<T> _tensor_a;
mir::Tensor<T> _tensor_b;
- const mir::TensorVariant _tensor_c;
+ mir::TensorVariant _tensor_c;
};
} // namespace nnc
assert (outputs.size());
// FIXME: it should be done properly via the given graph outputs
_graphOutputs.assign(outputs.begin(), outputs.end());
+#if 0
dump(input_nodes, outputs, onnx_node);
+#endif
}
// set graph outputs
// TODO: it should be done with onnx graph outputs
bool trans_a = found ? ivalue : 0;
std::tie (found, ivalue) = getIntAttribute(onnx_node, "transB");
bool trans_b = found ? ivalue : 0;
+ std::tie (found, ivalue) = getIntAttribute(onnx_node, "broadcast");
+ bool broadcast = found ? ivalue : 0;
std::tie (found, fvalue) = getFloatAttribute(onnx_node, "alpha");
float alpha = found ? fvalue : 1.0;
std::tie (found, fvalue) = getFloatAttribute(onnx_node, "beta");
//
auto input_c = inputs[2]->getOutput(0);
auto beta_tensor = createTensor(beta, input_c.op->getOutputShape(0));
+ // TODO: check 'broadcast' attribute here
if ((mult_a_b.rank() == 2) && (input_c.op->getOutputShape(0).rank() == 1)) {
beta_tensor = TensorVariant(beta_tensor, mult_a_b);
}
// NHWC -> NCHW
return createOp<ops::TransposeOp>(arg, std::vector<std::size_t>{0, 3, 1, 2});
}
-
} // namespace nnc
#include "cpp_pad.generated.h"
#include "cpp_transpose.generated.h"
#include "cpp_gather.generated.h"
+#include "cpp_gemm.generated.h"
namespace nnc
{
out.write(cpp_slice, sizeof(cpp_slice));
out.write(cpp_elementwise, sizeof(cpp_elementwise));
out.write(cpp_elu, sizeof(cpp_elu));
+ out.write(cpp_gemm, sizeof(cpp_gemm));
out.write(cpp_tanh, sizeof(cpp_tanh));
out.write(cpp_pad, sizeof(cpp_pad));
out.write(cpp_sqrt, sizeof(cpp_sqrt));
#include "core/modelIR/operations/BiasAddOp.h"
#include "core/modelIR/operations/CappedReluOp.h"
#include "core/modelIR/operations/ConcatOp.h"
+#include <core/modelIR/operations/ConstantOp.h>
#include "core/modelIR/operations/Conv2DOp.h"
#include "core/modelIR/operations/Deconv2DOp.h"
#include "core/modelIR/operations/DepthwiseConv2DOp.h"
vector<size_t> node_input_tensors;
for (const IODescriptor &d: op->getPrevNodes()) {
size_t idx = d.index;
- Operation *op = d.op;
- assert(_opToDescr.find(op) != _opToDescr.end());
- const OpDescr &descr = *_opToDescr[op];
+ Operation *prev_op = d.op;
+ assert(_opToDescr.find(prev_op) != _opToDescr.end());
+ const OpDescr &descr = *_opToDescr[prev_op];
const size_t &inTid = descr._outputs[idx];
node_input_tensors.push_back(inTid);
}
}
void ModelAnalyzer::visit(ops::GemmOp& op) {
- addOpDescr(&op, "gemm");
+ addOpDescr(&op, "gemmOp");
}
void ModelAnalyzer::visit(ops::CappedReluOp& op) {
void Serializer::visit(ops::GemmOp& op) {
_curOp->_paramStartOffset = _buffer.size();
+ serializeShape(op.getOutputShape(0));
}
void Serializer::visit(ops::CappedReluOp& op) {
limitations under the License.
==============================================================================*/
-inline void gemm(const float* tensor_a_data, const Dims<4>& tensor_a_dims,
- const float* tensor_b_data, const Dims<4>& tensor_b_dims,
- const float* tensor_c_data, const Dims<4>& tensor_c_dims,
- float* output_data, const Dims<4>& output_dims) {
- const auto tensor_a_map =
- MapAsMatrixWithFirstDimAsRows(tensor_a_data, tensor_a_dims);
- const auto tensor_b_map =
- MapAsMatrixWithFirstDimAsRows(tensor_b_data, tensor_b_dims);
- const auto tensor_c_map =
- MapAsMatrixWithFirstDimAsRows(tensor_c_data, tensor_c_dims);
+inline void gemm(const float* input_a, const Dims<4>& input_a_dims,
+ const float* input_b, const Dims<4>& input_b_dims,
+ const float* input_c, const Dims<4>& input_c_dims,
+ float* output_data, const Dims<4>& out_dims) {
+ const auto input_matrix_a_map =
+ MapAsMatrixWithFirstDimAsRows(input_a, input_a_dims);
+ const auto input_matrix_b_map =
+ MapAsMatrixWithFirstDimAsRows(input_b, input_b_dims);
+ const auto input_matrix_c_map =
+ MapAsMatrixWithFirstDimAsRows(input_c, input_c_dims);
auto output_matrix_map =
- MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
- Gemm(tensor_a_map, tensor_b_map, &output_matrix_map);
- auto size = tensor_a_dims.sizes[0] * tensor_a_dims.sizes[1] *
- tensor_a_dims.sizes[2] * tensor_a_dims.sizes[3];
- for (int i = 0; i < size; i++) {
- output_data[i] = output_data[i] + tensor_c_data[i];
- }
+ MapAsMatrixWithFirstDimAsRows(output_data, out_dims);
+
+ Gemm(input_matrix_b_map, input_matrix_a_map, &output_matrix_map);
+
+ int len = out_dims.sizes[0] * out_dims.sizes[1] *
+ out_dims.sizes[2] * out_dims.sizes[3];
+ for (int i = 0; i < len; i++)
+ output_data[i] += input_c[i];
}
out.getData(), shapeToDims(out_s));
}
+void gemmOp(Tensor &out, const char *params, const Tensor &tensor_a, const Tensor &tensor_b, const Tensor &tensor_c) {
+ Shape out_s = deserializeShape(params);
+ out.reShape(out_s);
+
+ gemm(tensor_a.getData(), shapeToDims(tensor_a.getShape()),
+ tensor_b.getData(), shapeToDims(tensor_b.getShape()),
+ tensor_c.getData(), shapeToDims(tensor_c.getShape()),
+ out.getData(), shapeToDims(out_s));
+}
/**
* @brief Resize assuming tflite axis order (NHWC)
*/
#include "code_snippets/cpp_elu.def"
#include "code_snippets/cpp_fully_connected.def"
#include "code_snippets/cpp_gather.def"
+#include "code_snippets/cpp_gemm.def"
#include "code_snippets/cpp_sigmoid.def"
#include "code_snippets/cpp_pad.def"
#include "code_snippets/cpp_pool.def"