From: 박종현/동작제어Lab(SR)/Senior Engineer/삼성전자 Date: Thu, 12 Apr 2018 00:07:23 +0000 (+0900) Subject: Implement naive feature map and kernel update (#585) X-Git-Tag: 0.1~313 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d67e778c04f0e03cb46f21aea4ef02e245255526;p=platform%2Fcore%2Fml%2Fnnfw.git Implement naive feature map and kernel update (#585) * Implement naive feature map and kernel update This commit implement unoptimize, naitve offset calulation (which is easy to debug). Signed-off-by: Jonghyun Park * Remove unused variables --- diff --git a/include/util/feature/TextFormatter.h b/include/util/feature/TextFormatter.h new file mode 100644 index 0000000..ef1b6b6 --- /dev/null +++ b/include/util/feature/TextFormatter.h @@ -0,0 +1,68 @@ +#ifndef __NNFW_UTIL_FEATURE_TEXT_FORMATTER_H__ +#define __NNFW_UTIL_FEATURE_TEXT_FORMATTER_H__ + +#include "util/feature/Shape.h" +#include "util/feature/Reader.h" + +#include +#include +#include + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +template class TextFormatter +{ +public: + TextFormatter(const Shape &shape, const Reader &data) + : _shape(shape), _data(data) + { + // DO NOTHING + } + +public: + const Shape &shape(void) const { return _shape; } + const Reader &data(void) const { return _data; } + +private: + const Shape &_shape; + const Reader &_data; +}; + +template +std::ostream &operator<<(std::ostream &os, const TextFormatter &fmt) +{ + const auto &shape = fmt.shape(); + + for (uint32_t ch = 0; ch < shape.C; ++ch) + { + os << " Channel " << ch << ":" << std::endl; + for (uint32_t row = 0; row < shape.H; ++row) + { + os << " "; + for (uint32_t col = 0; col < shape.W; ++col) + { + const auto value = fmt.data().at(ch, row, col); + os << std::right; + os << std::fixed; + os << std::setw(std::numeric_limits::digits10 + 2); + os << std::setprecision(5); + os << value; + os << " "; + } + os << std::endl; + } + } + + return os; +} + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_TEXT_FORMATTER_H__ diff --git a/src/kernel/acl/src/IO_accessor.cpp b/src/kernel/acl/src/IO_accessor.cpp index 400fd83..e209d98 100644 --- a/src/kernel/acl/src/IO_accessor.cpp +++ b/src/kernel/acl/src/IO_accessor.cpp @@ -1,5 +1,7 @@ #include "IO_accessor.h" +#include + namespace nnfw { namespace kernel { namespace acl { @@ -45,6 +47,47 @@ static uint32_t getOffsetNCHW(const android::nn::Shape& shape, const arm_compute return offset; } +static uint32_t getElementOffset(const android::nn::Shape& shape, + uint32_t ch, uint32_t row, uint32_t col) +{ + assert(getSizeOfDimension(shape, 0) == 1); + assert(shape.dimensions.size() == 4); + + // TODO Optimize this! + const uint32_t W = getSizeOfDimension(shape, 2); + const uint32_t C = getSizeOfDimension(shape, 3); + + int offset = 0; + + // NNAPI uses NHWC ordering + offset += row * W * C; + offset += col * C; + offset += ch; + + return offset; +} + +static uint32_t getElementOffset(const android::nn::Shape& shape, + uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) +{ + assert(shape.dimensions.size() == 4); + + // TODO Optimize this! + const uint32_t H = getSizeOfDimension(shape, 1); + const uint32_t W = getSizeOfDimension(shape, 2); + const uint32_t C = getSizeOfDimension(shape, 3); + + int offset = 0; + + // NNAPI uses NHWC ordering + offset += nth * H * W * C; + offset += row * W * C; + offset += col * C; + offset += ch; + + return offset; +} + bool InputAccessor::access_tensor(arm_compute::ITensor &tensor) { arm_compute::Window window; @@ -52,7 +95,12 @@ bool InputAccessor::access_tensor(arm_compute::ITensor &tensor) execute_window_loop(window, [&](const arm_compute::Coordinates& id) { - uint32_t offset = getOffsetNCHW(_inputShape, id); + const uint32_t ch = id[2]; + const uint32_t row = id[1]; + const uint32_t col = id[0]; + + uint32_t offset = getElementOffset(_inputShape, ch, row, col); + *reinterpret_cast(tensor.ptr_to_element(id)) = *(_inputData + offset); }); @@ -66,7 +114,13 @@ bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor) execute_window_loop(window, [&](const arm_compute::Coordinates& id) { - uint32_t offset = getOffsetNCHW(_filterShape, id); + const uint32_t nth = id[3]; + const uint32_t ch = id[2]; + const uint32_t row = id[1]; + const uint32_t col = id[0]; + + uint32_t offset = getElementOffset(_filterShape, nth, ch, row, col); + *reinterpret_cast(tensor.ptr_to_element(id)) = *(_filterData + offset); }); @@ -94,7 +148,12 @@ bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor) execute_window_loop(window, [&](const arm_compute::Coordinates& id) { - uint32_t offset = getOffsetNCHW(_outputShape, id); + const uint32_t ch = id[2]; + const uint32_t row = id[1]; + const uint32_t col = id[0]; + + uint32_t offset = getElementOffset(_outputShape, ch, row, col); + *(_outputData + offset) = *reinterpret_cast(tensor.ptr_to_element(id)); }); diff --git a/src/kernel/acl/src/cl/Conv2D.cpp b/src/kernel/acl/src/cl/Conv2D.cpp index 849aba0..d8afe36 100644 --- a/src/kernel/acl/src/cl/Conv2D.cpp +++ b/src/kernel/acl/src/cl/Conv2D.cpp @@ -9,6 +9,118 @@ #include +#include "util/feature/Shape.h" +#include "util/feature/Reader.h" +#include "util/feature/TextFormatter.h" + +namespace nnfw +{ +namespace support +{ +namespace nnapi +{ +namespace feature +{ + +template class Reader; + +template<> class Reader : public nnfw::util::feature::Reader +{ +public: + Reader(const nnfw::util::feature::Shape &shape, const float *base) + : _shape{shape}, _base{base} + { + // DO NOTHING + } + +public: + float at(uint32_t ch, uint32_t row, uint32_t col) const override + { + return *(_base + getElementOffset(ch, row, col)); + } + +private: + uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const + { + uint32_t res = 0; + + // NNAPI assumes that NHWC ordering for feature map + res += row * _shape.W * _shape.C; + res += col * _shape.C; + res += ch; + + return res; + } + +private: + nnfw::util::feature::Shape _shape; + const float *_base; +}; + +nnfw::util::feature::Shape asFeatureShape(const android::nn::Shape& shape) +{ + // NNAPI assumes the following ordering: + // + // dim(0) -> N + // dim(1) -> H + // dim(2) -> W + // dim(3) -> C + // + int32_t c = android::nn::getSizeOfDimension(shape, 3); + int32_t h = android::nn::getSizeOfDimension(shape, 1); + int32_t w = android::nn::getSizeOfDimension(shape, 2); + + assert(android::nn::getSizeOfDimension(shape, 0) == 1); + + return nnfw::util::feature::Shape{c, h, w}; +} + +} // namespace feature +} // namespace nnapi +} // namespace support +} // namespace nnfw + +namespace nnfw +{ +namespace support +{ +namespace acl +{ +namespace feature +{ + +template class Reader; + +template<> class Reader final : public nnfw::util::feature::Reader +{ +public: + Reader(arm_compute::ITensor *tensor) : _tensor{tensor} + { + assert(tensor->info()->data_type() == arm_compute::DataType::F32); + } + +public: + float at(uint32_t ch, uint32_t row, uint32_t col) const override + { + return *ptr_to_element(ch, row, col); + } + +private: + float *ptr_to_element(uint32_t ch, uint32_t row, uint32_t col) const + { + // ARM Compute uses CHW ordering + return reinterpret_cast(_tensor->ptr_to_element(arm_compute::Coordinates{col, row, ch})); + } + +private: + arm_compute::ITensor *_tensor; +}; + +} // namespace feature +} // namespace acl +} // namespace support +} // namespace nnfw + namespace nnfw { namespace kernel { namespace acl { @@ -77,6 +189,28 @@ void NCHW2NHWC(const float* nchw, float* nhwc, const android::nn::Shape& shape) } } +class Flag +{ +public: + Flag(const std::string &k) : _flag{false} + { + auto env = std::getenv(k.c_str()); + + if (env) + { + _flag = (std::atoi(env) > 0); + } + } + +public: + bool value(void) const { return _flag; } + +private: + bool _flag; +}; + +static Flag verbose{"CONV2D_VERBOSE"}; + bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, const float* filterData, const android::nn::Shape& filterShape, const float* biasData, const android::nn::Shape& biasShape, @@ -131,6 +265,21 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, TensorAccess(bias.ref(), biasData, biasShape); TensorAccess(filter.ref(), filterData, filterShape); + if (verbose.value()) + { + input.ref().map(); + auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape); + nnfw::support::nnapi::feature::Reader nnapi_ifm_reader{ifm_shape, inputData}; + nnfw::support::acl::feature::Reader acl_ifm_reader{input.ptr()}; + + std::cout << "NNAPI IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter{ifm_shape, nnapi_ifm_reader} << std::endl; + + std::cout << "ARM Compute IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter{ifm_shape, acl_ifm_reader} << std::endl; + input.ref().unmap(); + } + for (const auto &fn : fns) { fn->run(); @@ -138,12 +287,7 @@ bool convFloat32(const float* inputData, const android::nn::Shape& inputShape, arm_compute::CLScheduler::get().sync(); - // TODO put conversion inside OutputAccessor - uint32_t numItems = getNumItems(outputShape); - float* outputDataT = new float[numItems]; - TensorAccess(output.ref(), outputDataT, outputShape); - NCHW2NHWC(outputDataT, outputData, outputShape); - delete [] outputDataT; + TensorAccess(output.ref(), outputData, outputShape); return true; }