tests/InferenceTestImage.cpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // See LICENSE file in the project root for full license information.
   4 //
   5 #include "InferenceTestImage.hpp"
   6
   7 #include <boost/core/ignore_unused.hpp>
   8 #include <boost/format.hpp>
   9 #include <boost/core/ignore_unused.hpp>
  10 #include <boost/numeric/conversion/cast.hpp>
  11
  12 #include <array>
  13
  14 #define STB_IMAGE_IMPLEMENTATION
  15 #include <stb_image.h>
  16
  17 #define STB_IMAGE_RESIZE_IMPLEMENTATION
  18 #include <stb_image_resize.h>
  19
  20 #define STB_IMAGE_WRITE_IMPLEMENTATION
  21 #include <stb_image_write.h>
  22
  23 namespace
  24 {
  25
  26 unsigned int GetImageChannelIndex(ImageChannelLayout channelLayout, ImageChannel channel)
  27 {
  28     switch (channelLayout)
  29     {
  30     case ImageChannelLayout::Rgb:
  31         return static_cast<unsigned int>(channel);
  32     case ImageChannelLayout::Bgr:
  33         return 2u - static_cast<unsigned int>(channel);
  34     default:
  35         throw UnknownImageChannelLayout(boost::str(boost::format("Unknown layout %1%")
  36             % static_cast<int>(channelLayout)));
  37     }
  38 }
  39
  40 inline float Lerp(float a, float b, float w)
  41 {
  42     return w * b + (1.f - w) * a;
  43 }
  44
  45 inline void PutData(std::vector<float> & data,
  46                     const unsigned int width,
  47                     const unsigned int x,
  48                     const unsigned int y,
  49                     const unsigned int c,
  50                     float value)
  51 {
  52     data[(3*((y*width)+x)) + c] = value;
  53 }
  54
  55 std::vector<float> ResizeBilinearAndNormalize(const InferenceTestImage & image,
  56                                               const unsigned int outputWidth,
  57                                               const unsigned int outputHeight,
  58                                               const std::array<float, 3>& mean,
  59                                               const std::array<float, 3>& stddev)
  60 {
  61     std::vector<float> out;
  62     out.resize(outputWidth * outputHeight * 3);
  63
  64     // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
  65     // image is projected into the input image to figure out the interpolants and weights. Note that this
  66     // will yield different results than if projecting the centre of output texels.
  67
  68     const unsigned int inputWidth = image.GetWidth();
  69     const unsigned int inputHeight = image.GetHeight();
  70
  71     // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates
  72     // in the input image.
  73     const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight);
  74     const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth);
  75
  76     uint8_t rgb_x0y0[3];
  77     uint8_t rgb_x1y0[3];
  78     uint8_t rgb_x0y1[3];
  79     uint8_t rgb_x1y1[3];
  80
  81     for (unsigned int y = 0; y < outputHeight; ++y)
  82     {
  83         // Corresponding real-valued height coordinate in input image.
  84         const float iy = boost::numeric_cast<float>(y) * scaleY;
  85
  86         // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
  87         const float fiy = floorf(iy);
  88         const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy);
  89
  90         // Interpolation weight (range [0,1])
  91         const float yw = iy - fiy;
  92
  93         for (unsigned int x = 0; x < outputWidth; ++x)
  94         {
  95             // Real-valued and discrete width coordinates in input image.
  96             const float ix = boost::numeric_cast<float>(x) * scaleX;
  97             const float fix = floorf(ix);
  98             const unsigned int x0 = boost::numeric_cast<unsigned int>(fix);
  99
 100             // Interpolation weight (range [0,1]).
 101             const float xw = ix - fix;
 102
 103             // Discrete width/height coordinates of texels below and to the right of (x0, y0).
 104             const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u);
 105             const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u);
 106
 107             std::tie(rgb_x0y0[0], rgb_x0y0[1], rgb_x0y0[2]) = image.GetPixelAs3Channels(x0, y0);
 108             std::tie(rgb_x1y0[0], rgb_x1y0[1], rgb_x1y0[2]) = image.GetPixelAs3Channels(x1, y0);
 109             std::tie(rgb_x0y1[0], rgb_x0y1[1], rgb_x0y1[2]) = image.GetPixelAs3Channels(x0, y1);
 110             std::tie(rgb_x1y1[0], rgb_x1y1[1], rgb_x1y1[2]) = image.GetPixelAs3Channels(x1, y1);
 111
 112             for (unsigned c=0; c<3; ++c)
 113             {
 114                 const float ly0 = Lerp(float(rgb_x0y0[c]), float(rgb_x1y0[c]), xw);
 115                 const float ly1 = Lerp(float(rgb_x0y1[c]), float(rgb_x1y1[c]), xw);
 116                 const float l = Lerp(ly0, ly1, yw);
 117                 PutData(out, outputWidth, x, y, c, ((l/255.0f) - mean[c])/stddev[c]);
 118             }
 119         }
 120     }
 121     return out;
 122 }
 123
 124 } // namespace
 125
 126 InferenceTestImage::InferenceTestImage(char const* filePath)
 127  : m_Width(0u)
 128  , m_Height(0u)
 129  , m_NumChannels(0u)
 130 {
 131     int width;
 132     int height;
 133     int channels;
 134
 135     using StbImageDataPtr = std::unique_ptr<unsigned char, decltype(&stbi_image_free)>;
 136     StbImageDataPtr stbData(stbi_load(filePath, &width, &height, &channels, 0), &stbi_image_free);
 137
 138     if (stbData == nullptr)
 139     {
 140         throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load the image at %1%") % filePath));
 141     }
 142
 143     if (width == 0 || height == 0)
 144     {
 145         throw InferenceTestImageLoadFailed(boost::str(boost::format("Could not load empty image at %1%") % filePath));
 146     }
 147
 148     m_Width = boost::numeric_cast<unsigned int>(width);
 149     m_Height = boost::numeric_cast<unsigned int>(height);
 150     m_NumChannels = boost::numeric_cast<unsigned int>(channels);
 151
 152     const unsigned int sizeInBytes = GetSizeInBytes();
 153     m_Data.resize(sizeInBytes);
 154     memcpy(m_Data.data(), stbData.get(), sizeInBytes);
 155 }
 156
 157 std::tuple<uint8_t, uint8_t, uint8_t> InferenceTestImage::GetPixelAs3Channels(unsigned int x, unsigned int y) const
 158 {
 159     if (x >= m_Width || y >= m_Height)
 160     {
 161         throw InferenceTestImageOutOfBoundsAccess(boost::str(boost::format("Attempted out of bounds image access. "
 162             "Requested (%1%, %2%). Maximum valid coordinates (%3%, %4%).") % x % y % (m_Width - 1) % (m_Height - 1)));
 163     }
 164
 165     const unsigned int pixelOffset = x * GetNumChannels() + y * GetWidth() * GetNumChannels();
 166     const uint8_t* const pixelData = m_Data.data() + pixelOffset;
 167     BOOST_ASSERT(pixelData <= (m_Data.data() + GetSizeInBytes()));
 168
 169     std::array<uint8_t, 3> outPixelData;
 170     outPixelData.fill(0);
 171
 172     const unsigned int maxChannelsInPixel = std::min(GetNumChannels(), static_cast<unsigned int>(outPixelData.size()));
 173     for (unsigned int c = 0; c < maxChannelsInPixel; ++c)
 174     {
 175         outPixelData[c] = pixelData[c];
 176     }
 177
 178     return std::make_tuple(outPixelData[0], outPixelData[1], outPixelData[2]);
 179 }
 180
 181
 182 void InferenceTestImage::StbResize(InferenceTestImage& im, const unsigned int newWidth, const unsigned int newHeight)
 183 {
 184     std::vector<uint8_t> newData;
 185     newData.resize(newWidth * newHeight * im.GetNumChannels() * im.GetSingleElementSizeInBytes());
 186
 187     // boost::numeric_cast<>() is used for user-provided data (protecting about overflows).
 188     // static_cast<> is ok for internal data (assumes that, when internal data was originally provided by a user,
 189     // a boost::numeric_cast<>() handled the conversion).
 190     const int nW = boost::numeric_cast<int>(newWidth);
 191     const int nH = boost::numeric_cast<int>(newHeight);
 192
 193     const int w = static_cast<int>(im.GetWidth());
 194     const int h = static_cast<int>(im.GetHeight());
 195     const int numChannels = static_cast<int>(im.GetNumChannels());
 196
 197     const int res = stbir_resize_uint8(im.m_Data.data(), w, h, 0, newData.data(), nW, nH, 0, numChannels);
 198     if (res == 0)
 199     {
 200         throw InferenceTestImageResizeFailed("The resizing operation failed");
 201     }
 202
 203     im.m_Data.swap(newData);
 204     im.m_Width = newWidth;
 205     im.m_Height = newHeight;
 206 }
 207
 208 std::vector<float> InferenceTestImage::Resize(unsigned int newWidth,
 209                                               unsigned int newHeight,
 210                                               const armnn::CheckLocation& location,
 211                                               const ResizingMethods meth,
 212                                               const std::array<float, 3>& mean,
 213                                               const std::array<float, 3>& stddev)
 214 {
 215     std::vector<float> out;
 216     if (newWidth == 0 || newHeight == 0)
 217     {
 218         throw InferenceTestImageResizeFailed(boost::str(boost::format("None of the dimensions passed to a resize "
 219             "operation can be zero. Requested width: %1%. Requested height: %2%.") % newWidth % newHeight));
 220     }
 221
 222     if (newWidth == m_Width && newHeight == m_Height)
 223     {
 224         // Nothing to do.
 225         return out;
 226     }
 227
 228     switch (meth) {
 229         case ResizingMethods::STB:
 230         {
 231             StbResize(*this, newWidth, newHeight);
 232             break;
 233         }
 234         case ResizingMethods::BilinearAndNormalized:
 235         {
 236             out = ResizeBilinearAndNormalize(*this, newWidth, newHeight, mean, stddev);
 237             break;
 238         }
 239         default:
 240             throw InferenceTestImageResizeFailed(boost::str(
 241                 boost::format("Unknown resizing method asked ArmNN only supports {STB, BilinearAndNormalized} %1%")
 242                               % location.AsString()));
 243     }
 244     return out;
 245 }
 246
 247 void InferenceTestImage::Write(WriteFormat format, const char* filePath) const
 248 {
 249     const int w = static_cast<int>(GetWidth());
 250     const int h = static_cast<int>(GetHeight());
 251     const int numChannels = static_cast<int>(GetNumChannels());
 252     int res = 0;
 253
 254     switch (format)
 255     {
 256     case WriteFormat::Png:
 257         {
 258             res = stbi_write_png(filePath, w, h, numChannels, m_Data.data(), 0);
 259             break;
 260         }
 261     case WriteFormat::Bmp:
 262         {
 263             res = stbi_write_bmp(filePath, w, h, numChannels, m_Data.data());
 264             break;
 265         }
 266     case WriteFormat::Tga:
 267         {
 268             res = stbi_write_tga(filePath, w, h, numChannels, m_Data.data());
 269             break;
 270         }
 271     default:
 272         throw InferenceTestImageWriteFailed(boost::str(boost::format("Unknown format %1%")
 273             % static_cast<int>(format)));
 274     }
 275
 276     if (res == 0)
 277     {
 278         throw InferenceTestImageWriteFailed(boost::str(boost::format("An error occurred when writing to file %1%")
 279             % filePath));
 280     }
 281 }
 282
 283 template <typename TProcessValueCallable>
 284 std::vector<float> GetImageDataInArmNnLayoutAsFloats(ImageChannelLayout channelLayout,
 285     const InferenceTestImage& image,
 286     TProcessValueCallable processValue)
 287 {
 288     const unsigned int h = image.GetHeight();
 289     const unsigned int w = image.GetWidth();
 290
 291     std::vector<float> imageData;
 292     imageData.resize(h * w * 3);
 293
 294     for (unsigned int j = 0; j < h; ++j)
 295     {
 296         for (unsigned int i = 0; i < w; ++i)
 297         {
 298             uint8_t r, g, b;
 299             std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
 300
 301             // ArmNN order: C, H, W
 302             const unsigned int rDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::R) * h * w + j * w + i;
 303             const unsigned int gDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::G) * h * w + j * w + i;
 304             const unsigned int bDstIndex = GetImageChannelIndex(channelLayout, ImageChannel::B) * h * w + j * w + i;
 305
 306             imageData[rDstIndex] = processValue(ImageChannel::R, float(r));
 307             imageData[gDstIndex] = processValue(ImageChannel::G, float(g));
 308             imageData[bDstIndex] = processValue(ImageChannel::B, float(b));
 309         }
 310     }
 311
 312     return imageData;
 313 }
 314
 315 std::vector<float> GetImageDataInArmNnLayoutAsNormalizedFloats(ImageChannelLayout layout,
 316     const InferenceTestImage& image)
 317 {
 318     return GetImageDataInArmNnLayoutAsFloats(layout, image,
 319         [](ImageChannel channel, float value)
 320         {
 321             boost::ignore_unused(channel);
 322             return value / 255.f;
 323         });
 324 }
 325
 326 std::vector<float> GetImageDataInArmNnLayoutAsFloatsSubtractingMean(ImageChannelLayout layout,
 327     const InferenceTestImage& image,
 328     const std::array<float, 3>& mean)
 329 {
 330     return GetImageDataInArmNnLayoutAsFloats(layout, image,
 331         [layout, &mean](ImageChannel channel, float value)
 332         {
 333             const unsigned int channelIndex = GetImageChannelIndex(layout, channel);
 334             return value - mean[channelIndex];
 335         });
 336 }
 337
 338 std::vector<float> GetImageDataAsNormalizedFloats(ImageChannelLayout layout,
 339                                                   const InferenceTestImage& image)
 340 {
 341     std::vector<float> imageData;
 342     const unsigned int h = image.GetHeight();
 343     const unsigned int w = image.GetWidth();
 344
 345     const unsigned int rDstIndex = GetImageChannelIndex(layout, ImageChannel::R);
 346     const unsigned int gDstIndex = GetImageChannelIndex(layout, ImageChannel::G);
 347     const unsigned int bDstIndex = GetImageChannelIndex(layout, ImageChannel::B);
 348
 349     imageData.resize(h * w * 3);
 350     unsigned int offset = 0;
 351
 352     for (unsigned int j = 0; j < h; ++j)
 353     {
 354         for (unsigned int i = 0; i < w; ++i)
 355         {
 356             uint8_t r, g, b;
 357             std::tie(r, g, b) = image.GetPixelAs3Channels(i, j);
 358
 359             imageData[offset+rDstIndex] = float(r) / 255.0f;
 360             imageData[offset+gDstIndex] = float(g) / 255.0f;
 361             imageData[offset+bDstIndex] = float(b) / 255.0f;
 362             offset += 3;
 363         }
 364     }
 365
 366     return imageData;
 367 }