tests/InferenceTestImage.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5 #include "InferenceTestImage.hpp"
   6
   7 #include <boost/core/ignore_unused.hpp>
   8 #include <boost/format.hpp>
   9 #include <boost/core/ignore_unused.hpp>
  10 #include <boost/numeric/conversion/cast.hpp>
  11
  12 #include <array>
  13
  14 #define STB_IMAGE_IMPLEMENTATION
  15 #include <stb/stb_image.h>
  16
  17 #define STB_IMAGE_RESIZE_IMPLEMENTATION
  18 #include <stb/stb_image_resize.h>
  19
  20 #define STB_IMAGE_WRITE_IMPLEMENTATION
  21 #include <stb/stb_image_write.h>
  22
  23 namespace
  24 {
  25
  26 unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
  27 {
  28     switch (channelLayout)
  29     {
  30     case ImageChannelLayout::Rgb:
  31         return static_cast<unsigned int>(channel);
  32     case ImageChannelLayout::Bgr:
  33         return 2u - static_cast<unsigned int>(channel);
  34     default:
  35         throw UnknownImageChannelLayout(boost::str(boost::format("Unknown layout %1%")
  36             % static_cast<int>(channelLayout)));
  37     }
  38 }
  39
  40 inline float Lerp(float a, float b, float w)
  41 {
  42     return w * b + (1.f - w) * a;
  43 }
  44
  45 inline void PutData(std::vector<float> & data,
  46                     const unsigned int width,
  47                     const unsigned int x,
  48                     const unsigned int y,
  49                     const unsigned int c,
  50                     float value)
  51 {
  52     data[(3*((y*width)+x)) + c] = value;
  53 }
  54
  55 std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
  56                                               const unsigned int outputWidth,
  57                                               const unsigned int outputHeight,
  58                                               const float scale,
  59                                               const std::array<float, 3>& mean,
  60                                               const std::array<float, 3>& stddev)
  61 {
  62     std::vector<float> out;
  63     out.resize(outputWidth * outputHeight * 3);
  64
  65     // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
  66     // image is projected into the input image to figure out the interpolants and weights. Note that this
  67     // will yield different results than if projecting the centre of output texels.
  68
  69     const unsigned int inputWidth = image.GetWidth();
  70     const unsigned int inputHeight = image.GetHeight();
  71
  72     // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
  73     // in the input image.
  74     const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
  75     const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
  76
  77     uint8_t rgb_x0y0[3];
  78     uint8_t rgb_x1y0[3];
  79     uint8_t rgb_x0y1[3];
  80     uint8_t rgb_x1y1[3];
  81
  82     for (unsigned int y = 0; y < outputHeight; ++y)
  83     {
  84         // Corresponding real-valued height coordinate in input image.
  85         const float iy = boost::numeric_cast<float>(y) * scaleY;
  86
  87         // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
  88         const float fiy = floorf(iy);
  89         const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
  90
  91         // Interpolation weight (range [0,1])
  92         const float yw = iy - fiy;
  93
  94         for (unsigned int x = 0; x < outputWidth; ++x)
  95         {
  96             // Real-valued and discrete width coordinates in input image.
  97             const float ix = boost::numeric_cast<float>(x) * scaleX;
  98             const float fix = floorf(ix);
  99             const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
 100
 101             // Interpolation weight (range [0,1]).
 102             const float xw = ix - fix;
 103
 104             // Discrete width/height coordinates of texels below and to the right of (x0, y0).
 105             const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
 106             const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
 107
 108             std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
 109             std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
 110             std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
 111             std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
 112
 113             for (unsigned c=0; c<3; ++c)
 114             {
 115                 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
 116                 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
 117                 const float l = Lerp(ly0, ly1, yw);
 118                 PutData(out, outputWidth, x, y, c, ((l / scale) - mean[c]) / stddev[c]);
 119             }
 120         }
 121     }
 122     return out;
 123 }
 124
 125 } // namespace
 126
 127 InferenceTestImage::InferenceTestImage(char const* filePath)
 128  : m_Width(0u)
 129  , m_Height(0u)
 130  , m_NumChannels(0u)
 131 {
 132     int width;
 133     int height;
 134     int channels;
 135
 136     using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
 137     StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
 138
 139     if (stbData == nullptr)
 140     {
 141         throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load the image at %1%") % filePath));
 142     }
 143
 144     if (width == 0 || height == 0)
 145     {
 146         throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load empty image at %1%") % filePath));
 147     }
 148
 149     m_Width = boost::numeric_cast<unsigned int>(width);
 150     m_Height = boost::numeric_cast<unsigned int>(height);
 151     m_NumChannels = boost::numeric_cast<unsigned int>(channels);
 152
 153     const unsigned int sizeInBytes = GetSizeInBytes();
 154     m_Data.resize(sizeInBytes);
 155     memcpy(m_Data.data(), stbData.get(), sizeInBytes);
 156 }
 157
 158 std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
 159 {
 160     if (x >= m_Width || y >= m_Height)
 161     {
 162         throw InferenceTestImageOutOfBoundsAccess(boost::str(boost::format("Attempted out of bounds image access. "
 163             "Requested (%1%, %2%). Maximum valid coordinates (%3%, %4%).") % x % y % (m_Width - 1) % (m_Height - 1)));
 164     }
 165
 166     const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
 167     const uint8_t* const pixelData = m_Data.data() + pixelOffset;
 168     BOOST_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
 169
 170     std::array<uint8_t, 3> outPixelData;
 171     outPixelData.fill(0);
 172
 173     const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
 174     for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
 175     {
 176         outPixelData[c] = pixelData[c];
 177     }
 178
 179     return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
 180 }
 181
 182
 183 void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
 184 {
 185     std::vector<uint8_t> newData;
 186     newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
 187
 188     // boost::numeric_cast<>() is used for user-provided data (protecting about overflows).
 189     // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
 190     // a boost::numeric_cast<>() handled the conversion).
 191     const int nW = boost::numeric_cast<int>(newWidth);
 192     const int nH = boost::numeric_cast<int>(newHeight);
 193
 194     const int w = static_cast<int>(im.GetWidth());
 195     const int h = static_cast<int>(im.GetHeight());
 196     const int numChannels = static_cast<int>(im.GetNumChannels());
 197
 198     const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
 199     if (res == 0)
 200     {
 201         throw InferenceTestImageResizeFailed("The resizing operation failed");
 202     }
 203
 204     im.m_Data.swap(newData);
 205     im.m_Width = newWidth;
 206     im.m_Height = newHeight;
 207 }
 208
 209 std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
 210                                               unsigned int newHeight,
 211                                               const armnn::CheckLocation& location,
 212                                               const ResizingMethods meth,
 213                                               const std::array<float, 3>& mean,
 214                                               const std::array<float, 3>& stddev,
 215                                               const float scale)
 216 {
 217     std::vector<float> out;
 218     if (newWidth == 0 || newHeight == 0)
 219     {
 220         throw InferenceTestImageResizeFailed(boost::str(boost::format("None of the dimensions passed to a resize "
 221             "operation can be zero. Requested width: %1%. Requested height: %2%.") % newWidth % newHeight));
 222     }
 223
 224     switch (meth) {
 225         case ResizingMethods::STB:
 226         {
 227             StbResize(*this, newWidth, newHeight);
 228             break;
 229         }
 230         case ResizingMethods::BilinearAndNormalized:
 231         {
 232             out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, scale, mean, stddev);
 233             break;
 234         }
 235         default:
 236             throw InferenceTestImageResizeFailed(boost::str(
 237                 boost::format("Unknown resizing method asked ArmNN only supports {STB, BilinearAndNormalized} %1%")
 238                               % location.AsString()));
 239     }
 240     return out;
 241 }
 242
 243 void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
 244 {
 245     const int w = static_cast<int>(GetWidth());
 246     const int h = static_cast<int>(GetHeight());
 247     const int numChannels = static_cast<int>(GetNumChannels());
 248     int res = 0;
 249
 250     switch (format)
 251     {
 252     case WriteFormat::Png:
 253         {
 254             res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
 255             break;
 256         }
 257     case WriteFormat::Bmp:
 258         {
 259             res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
 260             break;
 261         }
 262     case WriteFormat::Tga:
 263         {
 264             res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
 265             break;
 266         }
 267     default:
 268         throw InferenceTestImageWriteFailed(boost::str(boost::format("Unknown format %1%")
 269             % static_cast<int>(format)));
 270     }
 271
 272     if (res == 0)
 273     {
 274         throw InferenceTestImageWriteFailed(boost::str(boost::format("An error occurred when writing to file %1%")
 275             % filePath));
 276     }
 277 }
 278
 279 template <typename TProcessValueCallable>
 280 std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
 281     const InferenceTestImage& image,
 282     TProcessValueCallable processValue)
 283 {
 284     const unsigned int h = image.GetHeight();
 285     const unsigned int w = image.GetWidth();
 286
 287     std::vector<float> imageData;
 288     imageData.resize(h * w * 3);
 289
 290     for (unsigned int j = 0; j < h; ++j)
 291     {
 292         for (unsigned int i = 0; i < w; ++i)
 293         {
 294             uint8_t r, g, b;
 295             std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
 296
 297             // ArmNN order: C, H, W
 298             const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
 299             const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
 300             const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
 301
 302             imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
 303             imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
 304             imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
 305         }
 306     }
 307
 308     return imageData;
 309 }
 310
 311 std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
 312     const InferenceTestImage& image)
 313 {
 314     return GetImageDataInArmNnLayoutAsFloats(layout, image,
 315         [](ImageChannel channel, float value)
 316         {
 317             boost::ignore_unused(channel);
 318             return value / 255.f;
 319         });
 320 }
 321
 322 std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
 323     const InferenceTestImage& image,
 324     const std::array<float, 3>& mean)
 325 {
 326     return GetImageDataInArmNnLayoutAsFloats(layout, image,
 327         [layout, &mean](ImageChannel channel, float value)
 328         {
 329             const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
 330             return value - mean[channelIndex];
 331         });
 332 }
 333
 334 std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
 335                                                   const InferenceTestImage& image)
 336 {
 337     std::vector<float> imageData;
 338     const unsigned int h = image.GetHeight();
 339     const unsigned int w = image.GetWidth();
 340
 341     const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
 342     const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
 343     const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
 344
 345     imageData.resize(h * w * 3);
 346     unsigned int offset = 0;
 347
 348     for (unsigned int j = 0; j < h; ++j)
 349     {
 350         for (unsigned int i = 0; i < w; ++i)
 351         {
 352             uint8_t r, g, b;
 353             std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
 354
 355             imageData[offset+rDstIndex] = float(r) / 255.0f;
 356             imageData[offset+gDstIndex] = float(g) / 255.0f;
 357             imageData[offset+bDstIndex] = float(b) / 255.0f;
 358             offset += 3;
 359         }
 360     }
 361
 362     return imageData;
 363 }