dali/internal/imaging/common/image-operations.cpp

   1 /*
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 #include <dali/internal/imaging/common/image-operations.h>
  19
  20 // EXTERNAL INCLUDES
  21 #include <dali/devel-api/adaptor-framework/image-loading.h>
  22 #include <dali/integration-api/debug.h>
  23 #include <dali/public-api/common/dali-vector.h>
  24 #include <dali/public-api/math/vector2.h>
  25 #include <stddef.h>
  26 #include <third-party/resampler/resampler.h>
  27 #include <cmath>
  28 #include <cstring>
  29 #include <limits>
  30 #include <memory>
  31
  32 // INTERNAL INCLUDES
  33
  34 namespace Dali
  35 {
  36 namespace Internal
  37 {
  38 namespace Platform
  39 {
  40 namespace
  41 {
  42 // The BORDER_FILL_VALUE is a single byte value that is used for horizontal and vertical borders.
  43 // A value of 0x00 gives us transparency for pixel buffers with an alpha channel, or black otherwise.
  44 // We can optionally use a Vector4 color here, but at reduced fill speed.
  45 const uint8_t BORDER_FILL_VALUE(0x00);
  46 // A maximum size limit for newly created bitmaps. ( 1u << 16 ) - 1 is chosen as we are using 16bit words for dimensions.
  47 const unsigned int MAXIMUM_TARGET_BITMAP_SIZE((1u << 16) - 1);
  48
  49 // Constants used by the ImageResampler.
  50 const float DEFAULT_SOURCE_GAMMA = 1.75f; ///< Default source gamma value used in the Resampler() function. Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction.
  51 const float FILTER_SCALE         = 1.f;   ///< Default filter scale value used in the Resampler() function. Filter scale - values < 1.0 cause aliasing, but create sharper looking mips.
  52
  53 const float RAD_135 = Math::PI_2 + Math::PI_4; ///< 135 degrees in radians;
  54 const float RAD_225 = RAD_135 + Math::PI_2;    ///< 225 degrees in radians;
  55 const float RAD_270 = 3.f * Math::PI_2;        ///< 270 degrees in radians;
  56 const float RAD_315 = RAD_225 + Math::PI_2;    ///< 315 degrees in radians;
  57
  58 using Integration::Bitmap;
  59 using Integration::BitmapPtr;
  60 typedef uint8_t PixelBuffer;
  61
  62 /**
  63  * @brief 4 byte pixel structure.
  64  */
  65 struct Pixel4Bytes
  66 {
  67   uint8_t r;
  68   uint8_t g;
  69   uint8_t b;
  70   uint8_t a;
  71 } __attribute__((packed, aligned(4))); //< Tell the compiler it is okay to use a single 32 bit load.
  72
  73 /**
  74  * @brief RGB888 pixel structure.
  75  */
  76 struct Pixel3Bytes
  77 {
  78   uint8_t r;
  79   uint8_t g;
  80   uint8_t b;
  81 } __attribute__((packed, aligned(1)));
  82
  83 /**
  84  * @brief RGB565 pixel typedefed from a short.
  85  *
  86  * Access fields by manual shifting and masking.
  87  */
  88 typedef uint16_t PixelRGB565;
  89
  90 /**
  91  * @brief a Pixel composed of two independent byte components.
  92  */
  93 struct Pixel2Bytes
  94 {
  95   uint8_t l;
  96   uint8_t a;
  97 } __attribute__((packed, aligned(2))); //< Tell the compiler it is okay to use a single 16 bit load.
  98
  99 #if defined(DEBUG_ENABLED)
 100 /**
 101  * Disable logging of image operations or make it verbose from the commandline
 102  * as follows (e.g., for dali demo app):
 103  * <code>
 104  * LOG_IMAGE_OPERATIONS=0 dali-demo #< off
 105  * LOG_IMAGE_OPERATIONS=3 dali-demo #< on, verbose
 106  * </code>
 107  */
 108 Debug::Filter* gImageOpsLogFilter = Debug::Filter::New(Debug::NoLogging, false, "LOG_IMAGE_OPERATIONS");
 109 #endif
 110
 111 /** @return The greatest even number less than or equal to the argument. */
 112 inline unsigned int EvenDown(const unsigned int a)
 113 {
 114   const unsigned int evened = a & ~1u;
 115   return evened;
 116 }
 117
 118 /**
 119  * @brief Log bad parameters.
 120  */
 121 void ValidateScalingParameters(const unsigned int inputWidth,
 122                                const unsigned int inputHeight,
 123                                const unsigned int desiredWidth,
 124                                const unsigned int desiredHeight)
 125 {
 126   if(desiredWidth > inputWidth || desiredHeight > inputHeight)
 127   {
 128     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Upscaling not supported (%u, %u -> %u, %u).\n", inputWidth, inputHeight, desiredWidth, desiredHeight);
 129   }
 130
 131   if(desiredWidth == 0u || desiredHeight == 0u)
 132   {
 133     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Downscaling to a zero-area target is pointless.\n");
 134   }
 135
 136   if(inputWidth == 0u || inputHeight == 0u)
 137   {
 138     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Zero area images cannot be scaled\n");
 139   }
 140 }
 141
 142 /**
 143  * @brief Do debug assertions common to all scanline halving functions.
 144  * @note Inline and in anon namespace so should boil away in release builds.
 145  */
 146 inline void DebugAssertScanlineParameters(const uint8_t* const pixels, const unsigned int width)
 147 {
 148   DALI_ASSERT_DEBUG(pixels && "Null pointer.");
 149   DALI_ASSERT_DEBUG(width > 1u && "Can't average fewer than two pixels.");
 150   DALI_ASSERT_DEBUG(width < 131072u && "Unusually wide image: are you sure you meant to pass that value in?");
 151 }
 152
 153 /**
 154  * @brief Assertions on params to functions averaging pairs of scanlines.
 155  * @note Inline as intended to boil away in release.
 156  */
 157 inline void DebugAssertDualScanlineParameters(const uint8_t* const scanline1,
 158                                               const uint8_t* const scanline2,
 159                                               uint8_t* const       outputScanline,
 160                                               const size_t         widthInComponents)
 161 {
 162   DALI_ASSERT_DEBUG(scanline1 && "Null pointer.");
 163   DALI_ASSERT_DEBUG(scanline2 && "Null pointer.");
 164   DALI_ASSERT_DEBUG(outputScanline && "Null pointer.");
 165   DALI_ASSERT_DEBUG(((scanline1 >= scanline2 + widthInComponents) || (scanline2 >= scanline1 + widthInComponents)) && "Scanlines alias.");
 166   DALI_ASSERT_DEBUG(((outputScanline >= (scanline2 + widthInComponents)) || (scanline2 >= (scanline1 + widthInComponents))) && "Scanline 2 aliases output.");
 167 }
 168
 169 /**
 170  * @brief Converts a scaling mode to the definition of which dimensions matter when box filtering as a part of that mode.
 171  */
 172 BoxDimensionTest DimensionTestForScalingMode(FittingMode::Type fittingMode)
 173 {
 174   BoxDimensionTest dimensionTest;
 175   dimensionTest = BoxDimensionTestEither;
 176
 177   switch(fittingMode)
 178   {
 179     // Shrink to fit attempts to make one or zero dimensions smaller than the
 180     // desired dimensions and one or two dimensions exactly the same as the desired
 181     // ones, so as long as one dimension is larger than the desired size, box
 182     // filtering can continue even if the second dimension is smaller than the
 183     // desired dimensions:
 184     case FittingMode::SHRINK_TO_FIT:
 185     {
 186       dimensionTest = BoxDimensionTestEither;
 187       break;
 188     }
 189     // Scale to fill mode keeps both dimensions at least as large as desired:
 190     case FittingMode::SCALE_TO_FILL:
 191     {
 192       dimensionTest = BoxDimensionTestBoth;
 193       break;
 194     }
 195     // Y dimension is irrelevant when downscaling in FIT_WIDTH mode:
 196     case FittingMode::FIT_WIDTH:
 197     {
 198       dimensionTest = BoxDimensionTestX;
 199       break;
 200     }
 201     // X Dimension is ignored by definition in FIT_HEIGHT mode:
 202     case FittingMode::FIT_HEIGHT:
 203     {
 204       dimensionTest = BoxDimensionTestY;
 205       break;
 206     }
 207   }
 208
 209   return dimensionTest;
 210 }
 211
 212 /**
 213  * @brief Work out the dimensions for a uniform scaling of the input to map it
 214  * into the target while effecting ShinkToFit scaling mode.
 215  */
 216 ImageDimensions FitForShrinkToFit(ImageDimensions target, ImageDimensions source)
 217 {
 218   // Scale the input by the least extreme of the two dimensions:
 219   const float widthScale  = target.GetX() / float(source.GetX());
 220   const float heightScale = target.GetY() / float(source.GetY());
 221   const float scale       = widthScale < heightScale ? widthScale : heightScale;
 222
 223   // Do no scaling at all if the result would increase area:
 224   if(scale >= 1.0f)
 225   {
 226     return source;
 227   }
 228
 229   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 230 }
 231
 232 /**
 233  * @brief Work out the dimensions for a uniform scaling of the input to map it
 234  * into the target while effecting SCALE_TO_FILL scaling mode.
 235  * @note An image scaled into the output dimensions will need either top and
 236  * bottom or left and right to be cropped away unless the source was pre-cropped
 237  * to match the destination aspect ratio.
 238  */
 239 ImageDimensions FitForScaleToFill(ImageDimensions target, ImageDimensions source)
 240 {
 241   DALI_ASSERT_DEBUG(source.GetX() > 0 && source.GetY() > 0 && "Zero-area rectangles should not be passed-in");
 242   // Scale the input by the least extreme of the two dimensions:
 243   const float widthScale  = target.GetX() / float(source.GetX());
 244   const float heightScale = target.GetY() / float(source.GetY());
 245   const float scale       = widthScale > heightScale ? widthScale : heightScale;
 246
 247   // Do no scaling at all if the result would increase area:
 248   if(scale >= 1.0f)
 249   {
 250     return source;
 251   }
 252
 253   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 254 }
 255
 256 /**
 257  * @brief Work out the dimensions for a uniform scaling of the input to map it
 258  * into the target while effecting FIT_WIDTH scaling mode.
 259  */
 260 ImageDimensions FitForFitWidth(ImageDimensions target, ImageDimensions source)
 261 {
 262   DALI_ASSERT_DEBUG(source.GetX() > 0 && "Cant fit a zero-dimension rectangle.");
 263   const float scale = target.GetX() / float(source.GetX());
 264
 265   // Do no scaling at all if the result would increase area:
 266   if(scale >= 1.0f)
 267   {
 268     return source;
 269   }
 270   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 271 }
 272
 273 /**
 274  * @brief Work out the dimensions for a uniform scaling of the input to map it
 275  * into the target while effecting FIT_HEIGHT scaling mode.
 276  */
 277 ImageDimensions FitForFitHeight(ImageDimensions target, ImageDimensions source)
 278 {
 279   DALI_ASSERT_DEBUG(source.GetY() > 0 && "Cant fit a zero-dimension rectangle.");
 280   const float scale = target.GetY() / float(source.GetY());
 281
 282   // Do no scaling at all if the result would increase area:
 283   if(scale >= 1.0f)
 284   {
 285     return source;
 286   }
 287
 288   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 289 }
 290
 291 /**
 292  * @brief Generate the rectangle to use as the target of a pixel sampling pass
 293  * (e.g., nearest or linear).
 294  */
 295 ImageDimensions FitToScalingMode(ImageDimensions requestedSize, ImageDimensions sourceSize, FittingMode::Type fittingMode)
 296 {
 297   ImageDimensions fitDimensions;
 298   switch(fittingMode)
 299   {
 300     case FittingMode::SHRINK_TO_FIT:
 301     {
 302       fitDimensions = FitForShrinkToFit(requestedSize, sourceSize);
 303       break;
 304     }
 305     case FittingMode::SCALE_TO_FILL:
 306     {
 307       fitDimensions = FitForScaleToFill(requestedSize, sourceSize);
 308       break;
 309     }
 310     case FittingMode::FIT_WIDTH:
 311     {
 312       fitDimensions = FitForFitWidth(requestedSize, sourceSize);
 313       break;
 314     }
 315     case FittingMode::FIT_HEIGHT:
 316     {
 317       fitDimensions = FitForFitHeight(requestedSize, sourceSize);
 318       break;
 319     }
 320   }
 321
 322   return fitDimensions;
 323 }
 324
 325 /**
 326  * @brief Calculate the number of lines on the X and Y axis that need to be
 327  * either added or removed with repect to the specified fitting mode.
 328  * (e.g., nearest or linear).
 329  * @param[in]     sourceSize      The size of the source image
 330  * @param[in]     fittingMode     The fitting mode to use
 331  * @param[in/out] requestedSize   The target size that the image will be fitted to.
 332  *                                If the source image is smaller than the requested size, the source is not scaled up.
 333  *                                So we reduce the target size while keeping aspect by lowering resolution.
 334  * @param[out]    scanlinesToCrop The number of scanlines to remove from the image (can be negative to represent Y borders required)
 335  * @param[out]    columnsToCrop   The number of columns to remove from the image (can be negative to represent X borders required)
 336  */
 337 void CalculateBordersFromFittingMode(ImageDimensions sourceSize, FittingMode::Type fittingMode, ImageDimensions& requestedSize, int& scanlinesToCrop, int& columnsToCrop)
 338 {
 339   const int   sourceWidth(static_cast<int>(sourceSize.GetWidth()));
 340   const int   sourceHeight(static_cast<int>(sourceSize.GetHeight()));
 341   const float targetAspect(static_cast<float>(requestedSize.GetWidth()) / static_cast<float>(requestedSize.GetHeight()));
 342   int         finalWidth  = 0;
 343   int         finalHeight = 0;
 344
 345   switch(fittingMode)
 346   {
 347     case FittingMode::FIT_WIDTH:
 348     {
 349       finalWidth  = sourceWidth;
 350       finalHeight = static_cast<float>(sourceWidth) / targetAspect;
 351
 352       columnsToCrop   = 0;
 353       scanlinesToCrop = -(finalHeight - sourceHeight);
 354       break;
 355     }
 356
 357     case FittingMode::FIT_HEIGHT:
 358     {
 359       finalWidth  = static_cast<float>(sourceHeight) * targetAspect;
 360       finalHeight = sourceHeight;
 361
 362       columnsToCrop   = -(finalWidth - sourceWidth);
 363       scanlinesToCrop = 0;
 364       break;
 365     }
 366
 367     case FittingMode::SHRINK_TO_FIT:
 368     {
 369       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 370       if(sourceAspect > targetAspect)
 371       {
 372         finalWidth  = sourceWidth;
 373         finalHeight = static_cast<float>(sourceWidth) / targetAspect;
 374
 375         columnsToCrop   = 0;
 376         scanlinesToCrop = -(finalHeight - sourceHeight);
 377       }
 378       else
 379       {
 380         finalWidth  = static_cast<float>(sourceHeight) * targetAspect;
 381         finalHeight = sourceHeight;
 382
 383         columnsToCrop   = -(finalWidth - sourceWidth);
 384         scanlinesToCrop = 0;
 385       }
 386       break;
 387     }
 388
 389     case FittingMode::SCALE_TO_FILL:
 390     {
 391       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 392       if(sourceAspect > targetAspect)
 393       {
 394         finalWidth  = static_cast<float>(sourceHeight) * targetAspect;
 395         finalHeight = sourceHeight;
 396
 397         columnsToCrop   = -(finalWidth - sourceWidth);
 398         scanlinesToCrop = 0;
 399       }
 400       else
 401       {
 402         finalWidth  = sourceWidth;
 403         finalHeight = static_cast<float>(sourceWidth) / targetAspect;
 404
 405         columnsToCrop   = 0;
 406         scanlinesToCrop = -(finalHeight - sourceHeight);
 407       }
 408       break;
 409     }
 410   }
 411
 412   requestedSize.SetWidth(finalWidth);
 413   requestedSize.SetHeight(finalHeight);
 414 }
 415
 416 /**
 417  * @brief Construct a pixel buffer object from a copy of the pixel array passed in.
 418  */
 419 Dali::Devel::PixelBuffer MakePixelBuffer(const uint8_t* const pixels, Pixel::Format pixelFormat, unsigned int width, unsigned int height)
 420 {
 421   DALI_ASSERT_DEBUG(pixels && "Null bitmap buffer to copy.");
 422
 423   // Allocate a pixel buffer to hold the image passed in:
 424   auto newBitmap = Dali::Devel::PixelBuffer::New(width, height, pixelFormat);
 425
 426   // Copy over the pixels from the downscaled image that was generated in-place in the pixel buffer of the input bitmap:
 427   memcpy(newBitmap.GetBuffer(), pixels, width * height * Pixel::GetBytesPerPixel(pixelFormat));
 428   return newBitmap;
 429 }
 430
 431 /**
 432  * @brief Work out the desired width and height, accounting for zeros.
 433  *
 434  * @param[in] bitmapWidth Width of image before processing.
 435  * @param[in] bitmapHeight Height of image before processing.
 436  * @param[in] requestedWidth Width of area to scale image into. Can be zero.
 437  * @param[in] requestedHeight Height of area to scale image into. Can be zero.
 438  * @return Dimensions of area to scale image into after special rules are applied.
 439  */
 440 ImageDimensions CalculateDesiredDimensions(unsigned int bitmapWidth, unsigned int bitmapHeight, unsigned int requestedWidth, unsigned int requestedHeight)
 441 {
 442   unsigned int maxSize = Dali::GetMaxTextureSize();
 443
 444   // If no dimensions have been requested, default to the source ones:
 445   if(requestedWidth == 0 && requestedHeight == 0)
 446   {
 447     if(bitmapWidth <= maxSize && bitmapHeight <= maxSize)
 448     {
 449       return ImageDimensions(bitmapWidth, bitmapHeight);
 450     }
 451     else
 452     {
 453       // Calculate the size from the max texture size and the source image aspect ratio
 454       if(bitmapWidth > bitmapHeight)
 455       {
 456         return ImageDimensions(maxSize, bitmapHeight * maxSize / static_cast<float>(bitmapWidth) + 0.5f);
 457       }
 458       else
 459       {
 460         return ImageDimensions(bitmapWidth * maxSize / static_cast<float>(bitmapHeight) + 0.5f, maxSize);
 461       }
 462     }
 463   }
 464
 465   // If both dimensions have values requested, use them both:
 466   if(requestedWidth != 0 && requestedHeight != 0)
 467   {
 468     if(requestedWidth <= maxSize && requestedHeight <= maxSize)
 469     {
 470       return ImageDimensions(requestedWidth, requestedHeight);
 471     }
 472     else
 473     {
 474       // Calculate the size from the max texture size and the source image aspect ratio
 475       if(requestedWidth > requestedHeight)
 476       {
 477         return ImageDimensions(maxSize, requestedHeight * maxSize / static_cast<float>(requestedWidth) + 0.5f);
 478       }
 479       else
 480       {
 481         return ImageDimensions(requestedWidth * maxSize / static_cast<float>(requestedHeight) + 0.5f, maxSize);
 482       }
 483     }
 484   }
 485
 486   // Only one of the dimensions has been requested. Calculate the other from
 487   // the requested one and the source image aspect ratio:
 488   if(requestedWidth != 0)
 489   {
 490     requestedWidth = std::min(requestedWidth, maxSize);
 491     return ImageDimensions(requestedWidth, bitmapHeight / float(bitmapWidth) * requestedWidth + 0.5f);
 492   }
 493
 494   requestedHeight = std::min(requestedHeight, maxSize);
 495   return ImageDimensions(bitmapWidth / float(bitmapHeight) * requestedHeight + 0.5f, requestedHeight);
 496 }
 497
 498 /**
 499  * @brief Rotates the given buffer @p pixelsIn 90 degrees counter clockwise.
 500  *
 501  * @note It allocates memory for the returned @p pixelsOut buffer.
 502  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 503  * @note It may fail if malloc() fails to allocate memory.
 504  *
 505  * @param[in] pixelsIn The input buffer.
 506  * @param[in] widthIn The width of the input buffer.
 507  * @param[in] heightIn The height of the input buffer.
 508  * @param[in] strideIn The stride of the input buffer.
 509  * @param[in] pixelSize The size of the pixel.
 510  * @param[out] pixelsOut The rotated output buffer.
 511  * @param[out] widthOut The width of the output buffer.
 512  * @param[out] heightOut The height of the output buffer.
 513  *
 514  * @return Whether the rotation succeeded.
 515  */
 516 bool Rotate90(const uint8_t* const pixelsIn,
 517               unsigned int         widthIn,
 518               unsigned int         heightIn,
 519               unsigned int         strideIn,
 520               unsigned int         pixelSize,
 521               uint8_t*&            pixelsOut,
 522               unsigned int&        widthOut,
 523               unsigned int&        heightOut)
 524 {
 525   // The new size of the image.
 526   widthOut  = heightIn;
 527   heightOut = widthIn;
 528
 529   // Allocate memory for the rotated buffer.
 530   // Output buffer is tightly packed
 531   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 532   if(nullptr == pixelsOut)
 533   {
 534     widthOut  = 0u;
 535     heightOut = 0u;
 536
 537     // Return if the memory allocations fails.
 538     return false;
 539   }
 540
 541   // Rotate the buffer.
 542   for(unsigned int y = 0u; y < heightIn; ++y)
 543   {
 544     const unsigned int srcLineIndex = y * strideIn;
 545     const unsigned int dstX         = y;
 546     for(unsigned int x = 0u; x < widthIn; ++x)
 547     {
 548       const unsigned int dstY     = heightOut - x - 1u;
 549       const unsigned int dstIndex = pixelSize * (dstY * widthOut + dstX);
 550       const unsigned int srcIndex = pixelSize * (srcLineIndex + x);
 551
 552       for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 553       {
 554         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 555       }
 556     }
 557   }
 558
 559   return true;
 560 }
 561
 562 /**
 563  * @brief Rotates the given buffer @p pixelsIn 180 degrees counter clockwise.
 564  *
 565  * @note It allocates memory for the returned @p pixelsOut buffer.
 566  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 567  * @note It may fail if malloc() fails to allocate memory.
 568  *
 569  * @param[in] pixelsIn The input buffer.
 570  * @param[in] widthIn The width of the input buffer.
 571  * @param[in] heightIn The height of the input buffer.
 572  * @param[in] strideIn The stride of the input buffer.
 573  * @param[in] pixelSize The size of the pixel.
 574  * @param[out] pixelsOut The rotated output buffer.
 575  *
 576  * @return Whether the rotation succeeded.
 577  */
 578 bool Rotate180(const uint8_t* const pixelsIn,
 579                unsigned int         widthIn,
 580                unsigned int         heightIn,
 581                unsigned int         strideIn,
 582                unsigned int         pixelSize,
 583                uint8_t*&            pixelsOut)
 584 {
 585   // Allocate memory for the rotated buffer.
 586   // Output buffer is tightly packed
 587   pixelsOut = static_cast<uint8_t*>(malloc(widthIn * heightIn * pixelSize));
 588   if(nullptr == pixelsOut)
 589   {
 590     // Return if the memory allocations fails.
 591     return false;
 592   }
 593
 594   // Rotate the buffer.
 595   for(unsigned int y = 0u; y < heightIn; ++y)
 596   {
 597     const unsigned int srcLineIndex = y * strideIn;
 598     const unsigned int dstY         = heightIn - y - 1u;
 599     for(unsigned int x = 0u; x < widthIn; ++x)
 600     {
 601       const unsigned int dstX     = widthIn - x - 1u;
 602       const unsigned int dstIndex = pixelSize * (dstY * widthIn + dstX);
 603       const unsigned int srcIndex = pixelSize * (srcLineIndex + x);
 604
 605       for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 606       {
 607         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 608       }
 609     }
 610   }
 611
 612   return true;
 613 }
 614
 615 /**
 616  * @brief Rotates the given buffer @p pixelsIn 270 degrees counter clockwise.
 617  *
 618  * @note It allocates memory for the returned @p pixelsOut buffer.
 619  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 620  * @note It may fail if malloc() fails to allocate memory.
 621  *
 622  * @param[in] pixelsIn The input buffer.
 623  * @param[in] widthIn The width of the input buffer.
 624  * @param[in] heightIn The height of the input buffer.
 625  * @param[in] strideIn The stride of the input buffer.
 626  * @param[in] pixelSize The size of the pixel.
 627  * @param[out] pixelsOut The rotated output buffer.
 628  * @param[out] widthOut The width of the output buffer.
 629  * @param[out] heightOut The height of the output buffer.
 630  *
 631  * @return Whether the rotation succeeded.
 632  */
 633 bool Rotate270(const uint8_t* const pixelsIn,
 634                unsigned int         widthIn,
 635                unsigned int         heightIn,
 636                unsigned int         strideIn,
 637                unsigned int         pixelSize,
 638                uint8_t*&            pixelsOut,
 639                unsigned int&        widthOut,
 640                unsigned int&        heightOut)
 641 {
 642   // The new size of the image.
 643   widthOut  = heightIn;
 644   heightOut = widthIn;
 645
 646   // Allocate memory for the rotated buffer.
 647   // Output buffer is tightly packed
 648   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 649   if(nullptr == pixelsOut)
 650   {
 651     widthOut  = 0u;
 652     heightOut = 0u;
 653
 654     // Return if the memory allocations fails.
 655     return false;
 656   }
 657
 658   // Rotate the buffer.
 659   for(unsigned int y = 0u; y < heightIn; ++y)
 660   {
 661     const unsigned int srcLineIndex = y * strideIn;
 662     const unsigned int dstX         = widthOut - y - 1u;
 663     for(unsigned int x = 0u; x < widthIn; ++x)
 664     {
 665       const unsigned int dstY     = x;
 666       const unsigned int dstIndex = pixelSize * (dstY * widthOut + dstX);
 667       const unsigned int srcIndex = pixelSize * (srcLineIndex + x);
 668
 669       for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 670       {
 671         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 672       }
 673     }
 674   }
 675
 676   return true;
 677 }
 678
 679 /**
 680  * @brief Skews a row horizontally (with filtered weights)
 681  *
 682  * @note Limited to 45 degree skewing only.
 683  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 684  *
 685  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 686  * @param[in] srcWidth The width of the input pixel buffer.
 687  * @param[in] srcStride The stride of the input pixel buffer.
 688  * @param[in] pixelSize The size of the pixel.
 689  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 690  * @param[in] dstWidth The width of the output pixel buffer.
 691  * @param[in] row The row index.
 692  * @param[in] offset The skew offset.
 693  * @param[in] weight The relative weight of right pixel.
 694  */
 695 void HorizontalSkew(const uint8_t* const srcBufferPtr,
 696                     int                  srcWidth,
 697                     int                  srcStride,
 698                     unsigned int         pixelSize,
 699                     uint8_t*&            dstBufferPtr,
 700                     int                  dstWidth,
 701                     unsigned int         row,
 702                     int                  offset,
 703                     float                weight)
 704 {
 705   if(offset > 0)
 706   {
 707     // Fill gap left of skew with background.
 708     memset(dstBufferPtr + row * pixelSize * dstWidth, 0u, pixelSize * offset);
 709   }
 710
 711   unsigned char oldLeft[4u] = {0u, 0u, 0u, 0u};
 712
 713   int i = 0;
 714   for(i = 0u; i < srcWidth; ++i)
 715   {
 716     // Loop through row pixels
 717     const unsigned int srcIndex = pixelSize * (row * srcStride + i);
 718
 719     unsigned char src[4u] = {0u, 0u, 0u, 0u};
 720     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 721     {
 722       src[channel] = *(srcBufferPtr + srcIndex + channel);
 723     }
 724
 725     // Calculate weights
 726     unsigned char left[4u] = {0u, 0u, 0u, 0u};
 727     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 728     {
 729       left[channel] = static_cast<unsigned char>(static_cast<float>(src[channel]) * weight);
 730
 731       // Update left over on source
 732       src[channel] -= (left[channel] - oldLeft[channel]);
 733     }
 734
 735     // Check boundaries
 736     if((i + offset >= 0) && (i + offset < dstWidth))
 737     {
 738       const unsigned int dstIndex = pixelSize * (row * dstWidth + i + offset);
 739
 740       for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 741       {
 742         *(dstBufferPtr + dstIndex + channel) = src[channel];
 743       }
 744     }
 745
 746     // Save leftover for next pixel in scan
 747     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 748     {
 749       oldLeft[channel] = left[channel];
 750     }
 751   }
 752
 753   // Go to rightmost point of skew
 754   i += offset;
 755   if(i < dstWidth)
 756   {
 757     // If still in image bounds, put leftovers there
 758     const unsigned int dstIndex = pixelSize * (row * dstWidth + i);
 759
 760     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 761     {
 762       *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 763     }
 764
 765     // Clear to the right of the skewed line with background
 766     ++i;
 767     memset(dstBufferPtr + pixelSize * (row * dstWidth + i), 0u, pixelSize * (dstWidth - i));
 768   }
 769 }
 770
 771 /**
 772  * @brief Skews a column vertically (with filtered weights)
 773  *
 774  * @note Limited to 45 degree skewing only.
 775  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 776  *
 777  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 778  * @param[in] srcWidth The width of the input pixel buffer.
 779  * @param[in] srcHeight The height of the input pixel buffer.
 780  * @param[in] srcStride The stride of the input pixel buffer.
 781  * @param[in] pixelSize The size of the pixel.
 782  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 783  * @param[in] dstWidth The width of the output pixel buffer.
 784  * @param[in] dstHeight The height of the output pixel buffer.
 785  * @param[in] column The column index.
 786  * @param[in] offset The skew offset.
 787  * @param[in] weight The relative weight of uppeer pixel.
 788  */
 789 void VerticalSkew(const uint8_t* const srcBufferPtr,
 790                   int                  srcWidth,
 791                   int                  srcHeight,
 792                   int                  srcStride,
 793                   unsigned int         pixelSize,
 794                   uint8_t*&            dstBufferPtr,
 795                   int                  dstWidth,
 796                   int                  dstHeight,
 797                   unsigned int         column,
 798                   int                  offset,
 799                   float                weight)
 800 {
 801   for(int i = 0; i < offset; ++i)
 802   {
 803     // Fill gap above skew with background
 804     const unsigned int dstIndex = pixelSize * (i * dstWidth + column);
 805
 806     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 807     {
 808       *(dstBufferPtr + dstIndex + channel) = 0u;
 809     }
 810   }
 811
 812   unsigned char oldLeft[4u] = {0u, 0u, 0u, 0u};
 813
 814   int yPos = 0;
 815   int i    = 0;
 816   for(i = 0; i < srcHeight; ++i)
 817   {
 818     // Loop through column pixels
 819     const unsigned int srcIndex = pixelSize * (i * srcStride + column);
 820
 821     unsigned char src[4u] = {0u, 0u, 0u, 0u};
 822     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 823     {
 824       src[channel] = *(srcBufferPtr + srcIndex + channel);
 825     }
 826
 827     yPos = i + offset;
 828
 829     // Calculate weights
 830     unsigned char left[4u] = {0u, 0u, 0u, 0u};
 831     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 832     {
 833       left[channel] = static_cast<unsigned char>(static_cast<float>(src[channel]) * weight);
 834       // Update left over on source
 835       src[channel] -= (left[channel] - oldLeft[channel]);
 836     }
 837
 838     // Check boundaries
 839     if((yPos >= 0) && (yPos < dstHeight))
 840     {
 841       const unsigned int dstIndex = pixelSize * (yPos * dstWidth + column);
 842
 843       for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 844       {
 845         *(dstBufferPtr + dstIndex + channel) = src[channel];
 846       }
 847     }
 848
 849     // Save leftover for next pixel in scan
 850     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 851     {
 852       oldLeft[channel] = left[channel];
 853     }
 854   }
 855
 856   // Go to bottom point of skew
 857   i = yPos;
 858   if(i < dstHeight)
 859   {
 860     // If still in image bounds, put leftovers there
 861     const unsigned int dstIndex = pixelSize * (i * dstWidth + column);
 862
 863     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 864     {
 865       *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 866     }
 867   }
 868
 869   while(++i < dstHeight)
 870   {
 871     // Clear below skewed line with background
 872     const unsigned int dstIndex = pixelSize * (i * dstWidth + column);
 873
 874     for(unsigned int channel = 0u; channel < pixelSize; ++channel)
 875     {
 876       *(dstBufferPtr + dstIndex + channel) = 0u;
 877     }
 878   }
 879 }
 880
 881 } // namespace
 882
 883 ImageDimensions CalculateDesiredDimensions(ImageDimensions rawDimensions, ImageDimensions requestedDimensions)
 884 {
 885   return CalculateDesiredDimensions(rawDimensions.GetWidth(), rawDimensions.GetHeight(), requestedDimensions.GetWidth(), requestedDimensions.GetHeight());
 886 }
 887
 888 /**
 889  * @brief Apply cropping and padding for specified fitting mode.
 890  *
 891  * Once the bitmap has been (optionally) downscaled to an appropriate size, this method performs alterations
 892  * based on the fitting mode.
 893  *
 894  * This will add vertical or horizontal borders if necessary.
 895  * Crop the source image data vertically or horizontally if necessary.
 896  * The aspect of the source image is preserved.
 897  * If the source image is smaller than the desired size, the algorithm will modify the the newly created
 898  *   bitmaps dimensions to only be as large as necessary, as a memory saving optimization. This will cause
 899  *   GPU scaling to be performed at render time giving the same result with less texture traversal.
 900  *
 901  * @param[in] bitmap            The source pixel buffer to perform modifications on.
 902  * @param[in] desiredDimensions The target dimensions to aim to fill based on the fitting mode.
 903  * @param[in] fittingMode       The fitting mode to use.
 904  *
 905  * @return                      A new bitmap with the padding and cropping required for fitting mode applied.
 906  *                              If no modification is needed or possible, the passed in bitmap is returned.
 907  */
 908 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode);
 909
 910 /**
 911  * @brief Adds horizontal or vertical borders to the source image.
 912  *
 913  * @param[in] targetPixels     The destination image pointer to draw the borders on.
 914  * @param[in] bytesPerPixel    The number of bytes per pixel of the target pixel buffer.
 915  * @param[in] targetDimensions The dimensions of the destination image.
 916  * @param[in] padDimensions    The columns and scanlines to pad with borders.
 917  */
 918 void AddBorders(PixelBuffer* targetPixels, const unsigned int bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions);
 919
 920 Dali::Devel::PixelBuffer ApplyAttributesToBitmap(Dali::Devel::PixelBuffer bitmap, ImageDimensions dimensions, FittingMode::Type fittingMode, SamplingMode::Type samplingMode)
 921 {
 922   if(bitmap)
 923   {
 924     // Calculate the desired box, accounting for a possible zero component:
 925     const ImageDimensions desiredDimensions = CalculateDesiredDimensions(bitmap.GetWidth(), bitmap.GetHeight(), dimensions.GetWidth(), dimensions.GetHeight());
 926
 927     // If a different size than the raw one has been requested, resize the image
 928     // maximally using a repeated box filter without making it smaller than the
 929     // requested size in either dimension:
 930     bitmap = DownscaleBitmap(bitmap, desiredDimensions, fittingMode, samplingMode);
 931
 932     // Cut the bitmap according to the desired width and height so that the
 933     // resulting bitmap has the same aspect ratio as the desired dimensions.
 934     // Add crop and add borders if necessary depending on fitting mode.
 935     if(bitmap)
 936     {
 937       bitmap = CropAndPadForFittingMode(bitmap, desiredDimensions, fittingMode);
 938     }
 939   }
 940
 941   return bitmap;
 942 }
 943
 944 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode)
 945 {
 946   const unsigned int inputWidth  = bitmap.GetWidth();
 947   const unsigned int inputHeight = bitmap.GetHeight();
 948   const unsigned int inputStride = bitmap.GetStride();
 949
 950   if(desiredDimensions.GetWidth() < 1u || desiredDimensions.GetHeight() < 1u)
 951   {
 952     DALI_LOG_WARNING("Image scaling aborted as desired dimensions too small (%u, %u).\n", desiredDimensions.GetWidth(), desiredDimensions.GetHeight());
 953   }
 954   else if(inputWidth != desiredDimensions.GetWidth() || inputHeight != desiredDimensions.GetHeight())
 955   {
 956     // Calculate any padding or cropping that needs to be done based on the fitting mode.
 957     // Note: If the desired size is larger than the original image, the desired size will be
 958     // reduced while maintaining the aspect, in order to save unnecessary memory usage.
 959     int scanlinesToCrop = 0;
 960     int columnsToCrop   = 0;
 961
 962     CalculateBordersFromFittingMode(ImageDimensions(inputWidth, inputHeight), fittingMode, desiredDimensions, scanlinesToCrop, columnsToCrop);
 963
 964     unsigned int desiredWidth(desiredDimensions.GetWidth());
 965     unsigned int desiredHeight(desiredDimensions.GetHeight());
 966
 967     // Action the changes by making a new bitmap with the central part of the loaded one if required.
 968     if(scanlinesToCrop != 0 || columnsToCrop != 0)
 969     {
 970       // Split the adding and removing of scanlines and columns into separate variables,
 971       // so we can use one piece of generic code to action the changes.
 972       unsigned int scanlinesToPad = 0;
 973       unsigned int columnsToPad   = 0;
 974       if(scanlinesToCrop < 0)
 975       {
 976         scanlinesToPad  = -scanlinesToCrop;
 977         scanlinesToCrop = 0;
 978       }
 979       if(columnsToCrop < 0)
 980       {
 981         columnsToPad  = -columnsToCrop;
 982         columnsToCrop = 0;
 983       }
 984
 985       // If there is no filtering, then the final image size can become very large, exit if larger than maximum.
 986       if((desiredWidth > MAXIMUM_TARGET_BITMAP_SIZE) || (desiredHeight > MAXIMUM_TARGET_BITMAP_SIZE) ||
 987          (columnsToPad > MAXIMUM_TARGET_BITMAP_SIZE) || (scanlinesToPad > MAXIMUM_TARGET_BITMAP_SIZE))
 988       {
 989         DALI_LOG_WARNING("Image scaling aborted as final dimensions too large (%u, %u).\n", desiredWidth, desiredHeight);
 990         return bitmap;
 991       }
 992
 993       // Create new PixelBuffer with the desired size.
 994       const auto pixelFormat = bitmap.GetPixelFormat();
 995
 996       auto croppedBitmap = Devel::PixelBuffer::New(desiredWidth, desiredHeight, pixelFormat);
 997
 998       // Add some pre-calculated offsets to the bitmap pointers so this is not done within a loop.
 999       // The cropping is added to the source pointer, and the padding is added to the destination.
1000       const auto               bytesPerPixel      = Pixel::GetBytesPerPixel(pixelFormat);
1001       const PixelBuffer* const sourcePixels       = bitmap.GetBuffer() + ((((scanlinesToCrop / 2) * inputStride) + (columnsToCrop / 2)) * bytesPerPixel);
1002       PixelBuffer* const       targetPixels       = croppedBitmap.GetBuffer();
1003       PixelBuffer* const       targetPixelsActive = targetPixels + ((((scanlinesToPad / 2) * desiredWidth) + (columnsToPad / 2)) * bytesPerPixel);
1004       DALI_ASSERT_DEBUG(sourcePixels && targetPixels);
1005
1006       // Copy the image data to the new bitmap.
1007       // Optimize to a single memcpy if the left and right edges don't need a crop or a pad.
1008       unsigned int outputSpan(desiredWidth * bytesPerPixel);
1009       if(columnsToCrop == 0 && columnsToPad == 0 && inputStride == inputWidth)
1010       {
1011         memcpy(targetPixelsActive, sourcePixels, (desiredHeight - scanlinesToPad) * outputSpan);
1012       }
1013       else
1014       {
1015         // The width needs to change (due to either a crop or a pad), so we copy a scanline at a time.
1016         // Precalculate any constants to optimize the inner loop.
1017         const unsigned int inputSpan(inputStride * bytesPerPixel);
1018         const unsigned int copySpan((desiredWidth - columnsToPad) * bytesPerPixel);
1019         const unsigned int scanlinesToCopy(desiredHeight - scanlinesToPad);
1020
1021         for(unsigned int y = 0; y < scanlinesToCopy; ++y)
1022         {
1023           memcpy(&targetPixelsActive[y * outputSpan], &sourcePixels[y * inputSpan], copySpan);
1024         }
1025       }
1026
1027       // Add vertical or horizontal borders to the final image (if required).
1028       desiredDimensions.SetWidth(desiredWidth);
1029       desiredDimensions.SetHeight(desiredHeight);
1030       AddBorders(croppedBitmap.GetBuffer(), bytesPerPixel, desiredDimensions, ImageDimensions(columnsToPad, scanlinesToPad));
1031       // Overwrite the loaded bitmap with the cropped version
1032       bitmap = croppedBitmap;
1033     }
1034   }
1035
1036   return bitmap;
1037 }
1038
1039 void AddBorders(PixelBuffer* targetPixels, const unsigned int bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions)
1040 {
1041   // Assign ints for faster access.
1042   unsigned int desiredWidth(targetDimensions.GetWidth());
1043   unsigned int desiredHeight(targetDimensions.GetHeight());
1044   unsigned int columnsToPad(padDimensions.GetWidth());
1045   unsigned int scanlinesToPad(padDimensions.GetHeight());
1046   unsigned int outputSpan(desiredWidth * bytesPerPixel);
1047
1048   // Add letterboxing (symmetrical borders) if needed.
1049   if(scanlinesToPad > 0)
1050   {
1051     // Add a top border. Note: This is (deliberately) rounded down if padding is an odd number.
1052     memset(targetPixels, BORDER_FILL_VALUE, (scanlinesToPad / 2) * outputSpan);
1053
1054     // We subtract scanlinesToPad/2 from scanlinesToPad so that we have the correct
1055     // offset for odd numbers (as the top border is 1 pixel smaller in these cases.
1056     unsigned int bottomBorderHeight = scanlinesToPad - (scanlinesToPad / 2);
1057
1058     // Bottom border.
1059     memset(&targetPixels[(desiredHeight - bottomBorderHeight) * outputSpan], BORDER_FILL_VALUE, bottomBorderHeight * outputSpan);
1060   }
1061   else if(columnsToPad > 0)
1062   {
1063     // Add a left and right border.
1064     // Left:
1065     // Pre-calculate span size outside of loop.
1066     unsigned int leftBorderSpanWidth((columnsToPad / 2) * bytesPerPixel);
1067     for(unsigned int y = 0; y < desiredHeight; ++y)
1068     {
1069       memset(&targetPixels[y * outputSpan], BORDER_FILL_VALUE, leftBorderSpanWidth);
1070     }
1071
1072     // Right:
1073     // Pre-calculate the initial x offset as it is always the same for a small optimization.
1074     // We subtract columnsToPad/2 from columnsToPad so that we have the correct
1075     // offset for odd numbers (as the left border is 1 pixel smaller in these cases.
1076     unsigned int       rightBorderWidth = columnsToPad - (columnsToPad / 2);
1077     PixelBuffer* const destPixelsRightBorder(targetPixels + ((desiredWidth - rightBorderWidth) * bytesPerPixel));
1078     unsigned int       rightBorderSpanWidth = rightBorderWidth * bytesPerPixel;
1079
1080     for(unsigned int y = 0; y < desiredHeight; ++y)
1081     {
1082       memset(&destPixelsRightBorder[y * outputSpan], BORDER_FILL_VALUE, rightBorderSpanWidth);
1083     }
1084   }
1085 }
1086
1087 Dali::Devel::PixelBuffer DownscaleBitmap(Dali::Devel::PixelBuffer bitmap,
1088                                          ImageDimensions          desired,
1089                                          FittingMode::Type        fittingMode,
1090                                          SamplingMode::Type       samplingMode)
1091 {
1092   // Source dimensions as loaded from resources (e.g. filesystem):
1093   auto bitmapWidth  = bitmap.GetWidth();
1094   auto bitmapHeight = bitmap.GetHeight();
1095   auto bitmapStride = bitmap.GetStride();
1096   // Desired dimensions (the rectangle to fit the source image to):
1097   auto desiredWidth  = desired.GetWidth();
1098   auto desiredHeight = desired.GetHeight();
1099
1100   Dali::Devel::PixelBuffer outputBitmap{bitmap};
1101
1102   // If a different size than the raw one has been requested, resize the image:
1103   if(
1104     (desiredWidth > 0.0f) && (desiredHeight > 0.0f) &&
1105     ((desiredWidth < bitmapWidth) || (desiredHeight < bitmapHeight)))
1106   {
1107     auto pixelFormat = bitmap.GetPixelFormat();
1108
1109     // Do the fast power of 2 iterated box filter to get to roughly the right side if the filter mode requests that:
1110     unsigned int shrunkWidth = -1, shrunkHeight = -1, outStride = -1;
1111     DownscaleInPlacePow2(bitmap.GetBuffer(), pixelFormat, bitmapWidth, bitmapHeight, bitmapStride, desiredWidth, desiredHeight, fittingMode, samplingMode, shrunkWidth, shrunkHeight, outStride);
1112
1113     // Work out the dimensions of the downscaled bitmap, given the scaling mode and desired dimensions:
1114     const ImageDimensions filteredDimensions = FitToScalingMode(ImageDimensions(desiredWidth, desiredHeight), ImageDimensions(shrunkWidth, shrunkHeight), fittingMode);
1115     const unsigned int    filteredWidth      = filteredDimensions.GetWidth();
1116     const unsigned int    filteredHeight     = filteredDimensions.GetHeight();
1117
1118     // Run a filter to scale down the bitmap if it needs it:
1119     bool filtered = false;
1120     if(filteredWidth < shrunkWidth || filteredHeight < shrunkHeight)
1121     {
1122       if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR ||
1123          samplingMode == SamplingMode::NEAREST || samplingMode == SamplingMode::BOX_THEN_NEAREST)
1124       {
1125         outputBitmap = Dali::Devel::PixelBuffer::New(filteredWidth, filteredHeight, pixelFormat);
1126
1127         if(outputBitmap)
1128         {
1129           if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1130           {
1131             LinearSample(bitmap.GetBuffer(), ImageDimensions(shrunkWidth, shrunkHeight), outStride, pixelFormat, outputBitmap.GetBuffer(), filteredDimensions);
1132           }
1133           else
1134           {
1135             PointSample(bitmap.GetBuffer(), shrunkWidth, shrunkHeight, outStride, pixelFormat, outputBitmap.GetBuffer(), filteredWidth, filteredHeight);
1136           }
1137           filtered = true;
1138         }
1139       }
1140     }
1141     // Copy out the 2^x downscaled, box-filtered pixels if no secondary filter (point or linear) was applied:
1142     if(filtered == false && (shrunkWidth < bitmapWidth || shrunkHeight < bitmapHeight))
1143     {
1144       // The buffer is downscaled and it is tightly packed. We don't need to set a stride.
1145       outputBitmap = MakePixelBuffer(bitmap.GetBuffer(), pixelFormat, shrunkWidth, shrunkHeight);
1146     }
1147   }
1148
1149   return outputBitmap;
1150 }
1151
1152 namespace
1153 {
1154 /**
1155  * @brief Returns whether to keep box filtering based on whether downscaled dimensions will overshoot the desired ones aty the next step.
1156  * @param test Which combination of the two dimensions matter for terminating the filtering.
1157  * @param scaledWidth The width of the current downscaled image.
1158  * @param scaledHeight The height of the current downscaled image.
1159  * @param desiredWidth The target width for the downscaling.
1160  * @param desiredHeight The target height for the downscaling.
1161  */
1162 bool ContinueScaling(BoxDimensionTest test, unsigned int scaledWidth, unsigned int scaledHeight, unsigned int desiredWidth, unsigned int desiredHeight)
1163 {
1164   bool               keepScaling = false;
1165   const unsigned int nextWidth   = scaledWidth >> 1u;
1166   const unsigned int nextHeight  = scaledHeight >> 1u;
1167
1168   if(nextWidth >= 1u && nextHeight >= 1u)
1169   {
1170     switch(test)
1171     {
1172       case BoxDimensionTestEither:
1173       {
1174         keepScaling = nextWidth >= desiredWidth || nextHeight >= desiredHeight;
1175         break;
1176       }
1177       case BoxDimensionTestBoth:
1178       {
1179         keepScaling = nextWidth >= desiredWidth && nextHeight >= desiredHeight;
1180         break;
1181       }
1182       case BoxDimensionTestX:
1183       {
1184         keepScaling = nextWidth >= desiredWidth;
1185         break;
1186       }
1187       case BoxDimensionTestY:
1188       {
1189         keepScaling = nextHeight >= desiredHeight;
1190         break;
1191       }
1192     }
1193   }
1194
1195   return keepScaling;
1196 }
1197
1198 /**
1199  * @brief A shared implementation of the overall iterative box filter
1200  * downscaling algorithm.
1201  *
1202  * Specialise this for particular pixel formats by supplying the number of bytes
1203  * per pixel and two functions: one for averaging pairs of neighbouring pixels
1204  * on a single scanline, and a second for averaging pixels at corresponding
1205  * positions on different scanlines.
1206  **/
1207 template<
1208   int BYTES_PER_PIXEL,
1209   void (*HalveScanlineInPlace)(unsigned char* const pixels, const unsigned int width),
1210   void (*AverageScanlines)(const unsigned char* const scanline1, const unsigned char* const __restrict__ scanline2, unsigned char* const outputScanline, const unsigned int width)>
1211 void DownscaleInPlacePow2Generic(unsigned char* const pixels,
1212                                  const unsigned int   inputWidth,
1213                                  const unsigned int   inputHeight,
1214                                  const unsigned int   inputStride,
1215                                  const unsigned int   desiredWidth,
1216                                  const unsigned int   desiredHeight,
1217                                  BoxDimensionTest     dimensionTest,
1218                                  unsigned&            outWidth,
1219                                  unsigned&            outHeight,
1220                                  unsigned&            outStride)
1221 {
1222   if(pixels == 0)
1223   {
1224     return;
1225   }
1226   ValidateScalingParameters(inputWidth, inputHeight, desiredWidth, desiredHeight);
1227
1228   // Scale the image until it would be smaller than desired, stopping if the
1229   // resulting height or width would be less than 1:
1230   unsigned int scaledWidth = inputWidth, scaledHeight = inputHeight, stride = inputStride;
1231   while(ContinueScaling(dimensionTest, scaledWidth, scaledHeight, desiredWidth, desiredHeight))
1232   {
1233     const unsigned int lastWidth  = scaledWidth;
1234     const unsigned int lastStride = stride;
1235     scaledWidth >>= 1u;
1236     scaledHeight >>= 1u;
1237     stride = scaledWidth;
1238
1239     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Scaling to %u\t%u.\n", scaledWidth, scaledHeight);
1240
1241     const unsigned int lastScanlinePair = scaledHeight - 1;
1242
1243     // Scale pairs of scanlines until any spare one at the end is dropped:
1244     for(unsigned int y = 0; y <= lastScanlinePair; ++y)
1245     {
1246       // Scale two scanlines horizontally:
1247       HalveScanlineInPlace(&pixels[y * 2 * lastStride * BYTES_PER_PIXEL], lastWidth);
1248       HalveScanlineInPlace(&pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL], lastWidth);
1249
1250       // Scale vertical pairs of pixels while the last two scanlines are still warm in
1251       // the CPU cache(s):
1252       // Note, better access patterns for cache-coherence are possible for very large
1253       // images but even a 4k wide RGB888 image will use just 24kB of cache (4k pixels
1254       // * 3 Bpp * 2 scanlines) for two scanlines on the first iteration.
1255       AverageScanlines(
1256         &pixels[y * 2 * lastStride * BYTES_PER_PIXEL],
1257         &pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL],
1258         &pixels[y * scaledWidth * BYTES_PER_PIXEL],
1259         scaledWidth);
1260     }
1261   }
1262
1263   ///@note: we could finish off with one of two mutually exclusive passes, one squashing horizontally as far as possible, and the other vertically, if we knew a following cpu point or bilinear filter would restore the desired aspect ratio.
1264   outWidth  = scaledWidth;
1265   outHeight = scaledHeight;
1266   outStride = stride;
1267 }
1268
1269 } // namespace
1270
1271 void HalveScanlineInPlaceRGB888(unsigned char* const pixels, const unsigned int width)
1272 {
1273   DebugAssertScanlineParameters(pixels, width);
1274
1275   const unsigned int lastPair = EvenDown(width - 2);
1276
1277   /**
1278    * @code
1279    *  for(unsigned int pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1280    * {
1281    *   // Load all the byte pixel components we need:
1282    *   const unsigned int c11 = pixels[pixel * 3];
1283    *   const unsigned int c12 = pixels[pixel * 3 + 1];
1284    *   const unsigned int c13 = pixels[pixel * 3 + 2];
1285    *   const unsigned int c21 = pixels[pixel * 3 + 3];
1286    *   const unsigned int c22 = pixels[pixel * 3 + 4];
1287    *   const unsigned int c23 = pixels[pixel * 3 + 5];
1288    *
1289    *   // Save the averaged byte pixel components:
1290    *   pixels[outPixel * 3]     = static_cast<unsigned char>(AverageComponent(c11, c21));
1291    *   pixels[outPixel * 3 + 1] = static_cast<unsigned char>(AverageComponent(c12, c22));
1292    *   pixels[outPixel * 3 + 2] = static_cast<unsigned char>(AverageComponent(c13, c23));
1293    * }
1294    *   @endcode
1295    */
1296   //@ToDo : Fix here if we found that collect 12 bytes == 3 uint32_t with 4 colors, and calculate in one-operation
1297   std::uint8_t* inPixelPtr  = pixels;
1298   std::uint8_t* outPixelPtr = pixels;
1299   for(std::uint32_t scanedPixelCount = 0; scanedPixelCount <= lastPair; scanedPixelCount += 2)
1300   {
1301     *(outPixelPtr + 0) = ((*(inPixelPtr + 0) ^ *(inPixelPtr + 3)) >> 1) + (*(inPixelPtr + 0) & *(inPixelPtr + 3));
1302     *(outPixelPtr + 1) = ((*(inPixelPtr + 1) ^ *(inPixelPtr + 4)) >> 1) + (*(inPixelPtr + 1) & *(inPixelPtr + 4));
1303     *(outPixelPtr + 2) = ((*(inPixelPtr + 2) ^ *(inPixelPtr + 5)) >> 1) + (*(inPixelPtr + 2) & *(inPixelPtr + 5));
1304     inPixelPtr += 6;
1305     outPixelPtr += 3;
1306   }
1307 }
1308
1309 void HalveScanlineInPlaceRGBA8888(unsigned char* const pixels, const unsigned int width)
1310 {
1311   DebugAssertScanlineParameters(pixels, width);
1312   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1313
1314   uint32_t* const alignedPixels = reinterpret_cast<uint32_t*>(pixels);
1315
1316   const unsigned int lastPair = EvenDown(width - 2);
1317
1318   for(unsigned int pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1319   {
1320     const uint32_t averaged = AveragePixelRGBA8888(alignedPixels[pixel], alignedPixels[pixel + 1]);
1321     alignedPixels[outPixel] = averaged;
1322   }
1323 }
1324
1325 void HalveScanlineInPlaceRGB565(unsigned char* pixels, unsigned int width)
1326 {
1327   DebugAssertScanlineParameters(pixels, width);
1328   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1329
1330   uint16_t* const alignedPixels = reinterpret_cast<uint16_t*>(pixels);
1331
1332   const unsigned int lastPair = EvenDown(width - 2);
1333
1334   for(unsigned int pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1335   {
1336     const uint32_t averaged = AveragePixelRGB565(alignedPixels[pixel], alignedPixels[pixel + 1]);
1337     alignedPixels[outPixel] = averaged;
1338   }
1339 }
1340
1341 void HalveScanlineInPlace2Bytes(unsigned char* const pixels, const unsigned int width)
1342 {
1343   DebugAssertScanlineParameters(pixels, width);
1344
1345   const unsigned int lastPair = EvenDown(width - 2);
1346
1347   for(unsigned int pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1348   {
1349     /**
1350      * @code
1351      * // Load all the byte pixel components we need:
1352      * const unsigned int c11 = pixels[pixel * 2];
1353      * const unsigned int c12 = pixels[pixel * 2 + 1];
1354      * const unsigned int c21 = pixels[pixel * 2 + 2];
1355      * const unsigned int c22 = pixels[pixel * 2 + 3];
1356      *
1357      * // Save the averaged byte pixel components:
1358      * pixels[outPixel * 2]     = static_cast<unsigned char>(AverageComponent(c11, c21));
1359      * pixels[outPixel * 2 + 1] = static_cast<unsigned char>(AverageComponent(c12, c22));
1360      * @endcode
1361      */
1362     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1363     pixels[(outPixel << 1)]     = ((pixels[(pixel << 1)] ^ pixels[(pixel << 1) | 2]) >> 1) + (pixels[(pixel << 1)] & pixels[(pixel << 1) | 2]);
1364     pixels[(outPixel << 1) | 1] = ((pixels[(pixel << 1) | 1] ^ pixels[(pixel << 1) | 3]) >> 1) + (pixels[(pixel << 1) | 1] & pixels[(pixel << 1) | 3]);
1365   }
1366 }
1367
1368 void HalveScanlineInPlace1Byte(unsigned char* const pixels, const unsigned int width)
1369 {
1370   DebugAssertScanlineParameters(pixels, width);
1371
1372   const unsigned int lastPair = EvenDown(width - 2);
1373
1374   for(unsigned int pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1375   {
1376     /**
1377      * @code
1378      * // Load all the byte pixel components we need:
1379      * const unsigned int c1 = pixels[pixel];
1380      * const unsigned int c2 = pixels[pixel + 1];
1381      *
1382      * // Save the averaged byte pixel component:
1383      * pixels[outPixel] = static_cast<unsigned char>(AverageComponent(c1, c2));
1384      * @endcode
1385      */
1386     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1387     pixels[outPixel] = ((pixels[pixel] ^ pixels[pixel | 1]) >> 1) + (pixels[pixel] & pixels[pixel | 1]);
1388   }
1389 }
1390
1391 // AverageScanline
1392
1393 namespace
1394 {
1395 /**
1396  * @copydoc AverageScanlines1
1397  * @note This API average eight components in one operation.
1398  * It will give performance benifit.
1399  */
1400 inline void AverageScanlinesWithEightComponents(
1401   const unsigned char* const scanline1,
1402   const unsigned char* const __restrict__ scanline2,
1403   unsigned char* const outputScanline,
1404   const unsigned int   totalComponentCount)
1405 {
1406   unsigned int component = 0;
1407   if(DALI_LIKELY(totalComponentCount >= 8))
1408   {
1409     // Jump 8 components in one step
1410     const std::uint64_t* const scanline18Step = reinterpret_cast<const std::uint64_t* const>(scanline1);
1411     const std::uint64_t* const scanline28Step = reinterpret_cast<const std::uint64_t* const>(scanline2);
1412     std::uint64_t* const       output8step    = reinterpret_cast<std::uint64_t* const>(outputScanline);
1413
1414     const std::uint32_t totalStepCount = (totalComponentCount) >> 3;
1415     component                          = totalStepCount << 3;
1416
1417     // and for each step, calculate average of 8 bytes.
1418     for(std::uint32_t i = 0; i < totalStepCount; ++i)
1419     {
1420       const auto& c1     = *(scanline18Step + i);
1421       const auto& c2     = *(scanline28Step + i);
1422       *(output8step + i) = static_cast<std::uint64_t>((((c1 ^ c2) & 0xfefefefefefefefeull) >> 1) + (c1 & c2));
1423     }
1424   }
1425   // remaining components calculate
1426   for(; component < totalComponentCount; ++component)
1427   {
1428     const auto& c1            = scanline1[component];
1429     const auto& c2            = scanline2[component];
1430     outputScanline[component] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1431   }
1432 }
1433
1434 } // namespace
1435
1436 void AverageScanlines1(const unsigned char* const scanline1,
1437                        const unsigned char* const __restrict__ scanline2,
1438                        unsigned char* const outputScanline,
1439                        const unsigned int   width)
1440 {
1441   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width);
1442
1443   /**
1444    * @code
1445    * for(unsigned int component = 0; component < width; ++component)
1446    * {
1447    *   outputScanline[component] = static_cast<unsigned char>(AverageComponent(scanline1[component], scanline2[component]));
1448    * }
1449    * @endcode
1450    */
1451   AverageScanlinesWithEightComponents(scanline1, scanline2, outputScanline, width);
1452 }
1453
1454 void AverageScanlines2(const unsigned char* const scanline1,
1455                        const unsigned char* const __restrict__ scanline2,
1456                        unsigned char* const outputScanline,
1457                        const unsigned int   width)
1458 {
1459   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1460
1461   /**
1462    * @code
1463    * for(unsigned int component = 0; component < width * 2; ++component)
1464    * {
1465    *   outputScanline[component] = static_cast<unsigned char>(AverageComponent(scanline1[component], scanline2[component]));
1466    * }
1467    * @endcode
1468    */
1469   AverageScanlinesWithEightComponents(scanline1, scanline2, outputScanline, width * 2);
1470 }
1471
1472 void AverageScanlines3(const unsigned char* const scanline1,
1473                        const unsigned char* const __restrict__ scanline2,
1474                        unsigned char* const outputScanline,
1475                        const unsigned int   width)
1476 {
1477   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 3);
1478
1479   /**
1480    * @code
1481    * for(unsigned int component = 0; component < width * 3; ++component)
1482    * {
1483    *   outputScanline[component] = static_cast<unsigned char>(AverageComponent(scanline1[component], scanline2[component]));
1484    * }
1485    * @endcode
1486    */
1487   AverageScanlinesWithEightComponents(scanline1, scanline2, outputScanline, width * 3);
1488 }
1489
1490 void AverageScanlinesRGBA8888(const unsigned char* const scanline1,
1491                               const unsigned char* const __restrict__ scanline2,
1492                               unsigned char* const outputScanline,
1493                               const unsigned int   width)
1494 {
1495   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 4);
1496   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1497   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1498   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1499
1500   const uint32_t* const alignedScanline1 = reinterpret_cast<const uint32_t*>(scanline1);
1501   const uint32_t* const alignedScanline2 = reinterpret_cast<const uint32_t*>(scanline2);
1502   uint32_t* const       alignedOutput    = reinterpret_cast<uint32_t*>(outputScanline);
1503
1504   for(unsigned int pixel = 0; pixel < width; ++pixel)
1505   {
1506     alignedOutput[pixel] = AveragePixelRGBA8888(alignedScanline1[pixel], alignedScanline2[pixel]);
1507   }
1508 }
1509
1510 void AverageScanlinesRGB565(const unsigned char* const scanline1,
1511                             const unsigned char* const __restrict__ scanline2,
1512                             unsigned char* const outputScanline,
1513                             const unsigned int   width)
1514 {
1515   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1516   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1517   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1518   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1519
1520   const uint16_t* const alignedScanline1 = reinterpret_cast<const uint16_t*>(scanline1);
1521   const uint16_t* const alignedScanline2 = reinterpret_cast<const uint16_t*>(scanline2);
1522   uint16_t* const       alignedOutput    = reinterpret_cast<uint16_t*>(outputScanline);
1523
1524   for(unsigned int pixel = 0; pixel < width; ++pixel)
1525   {
1526     alignedOutput[pixel] = AveragePixelRGB565(alignedScanline1[pixel], alignedScanline2[pixel]);
1527   }
1528 }
1529
1530 /// Dispatch to pixel format appropriate box filter downscaling functions.
1531 void DownscaleInPlacePow2(unsigned char* const pixels,
1532                           Pixel::Format        pixelFormat,
1533                           unsigned int         inputWidth,
1534                           unsigned int         inputHeight,
1535                           unsigned int         inputStride,
1536                           unsigned int         desiredWidth,
1537                           unsigned int         desiredHeight,
1538                           FittingMode::Type    fittingMode,
1539                           SamplingMode::Type   samplingMode,
1540                           unsigned&            outWidth,
1541                           unsigned&            outHeight,
1542                           unsigned&            outStride)
1543 {
1544   outWidth  = inputWidth;
1545   outHeight = inputHeight;
1546   outStride = inputStride;
1547   // Perform power of 2 iterated 4:1 box filtering if the requested filter mode requires it:
1548   if(samplingMode == SamplingMode::BOX || samplingMode == SamplingMode::BOX_THEN_NEAREST || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1549   {
1550     // Check the pixel format is one that is supported:
1551     if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
1552     {
1553       const BoxDimensionTest dimensionTest = DimensionTestForScalingMode(fittingMode);
1554
1555       switch(pixelFormat)
1556       {
1557         case Pixel::RGBA8888:
1558         {
1559           Internal::Platform::DownscaleInPlacePow2RGBA8888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1560           break;
1561         }
1562         case Pixel::RGB888:
1563         {
1564           Internal::Platform::DownscaleInPlacePow2RGB888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1565           break;
1566         }
1567         case Pixel::RGB565:
1568         {
1569           Internal::Platform::DownscaleInPlacePow2RGB565(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1570           break;
1571         }
1572         case Pixel::LA88:
1573         {
1574           Internal::Platform::DownscaleInPlacePow2ComponentPair(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1575           break;
1576         }
1577         case Pixel::L8:
1578         case Pixel::A8:
1579         {
1580           Internal::Platform::DownscaleInPlacePow2SingleBytePerPixel(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1581           break;
1582         }
1583         default:
1584         {
1585           DALI_ASSERT_DEBUG(false && "Inner branch conditions don't match outer branch.");
1586         }
1587       }
1588     }
1589   }
1590   else
1591   {
1592     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not shrunk: unsupported pixel format: %u.\n", unsigned(pixelFormat));
1593   }
1594 }
1595
1596 void DownscaleInPlacePow2RGB888(unsigned char*   pixels,
1597                                 unsigned int     inputWidth,
1598                                 unsigned int     inputHeight,
1599                                 unsigned int     inputStride,
1600                                 unsigned int     desiredWidth,
1601                                 unsigned int     desiredHeight,
1602                                 BoxDimensionTest dimensionTest,
1603                                 unsigned&        outWidth,
1604                                 unsigned&        outHeight,
1605                                 unsigned&        outStride)
1606 {
1607   DownscaleInPlacePow2Generic<3, HalveScanlineInPlaceRGB888, AverageScanlines3>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1608 }
1609
1610 void DownscaleInPlacePow2RGBA8888(unsigned char*   pixels,
1611                                   unsigned int     inputWidth,
1612                                   unsigned int     inputHeight,
1613                                   unsigned int     inputStride,
1614                                   unsigned int     desiredWidth,
1615                                   unsigned int     desiredHeight,
1616                                   BoxDimensionTest dimensionTest,
1617                                   unsigned&        outWidth,
1618                                   unsigned&        outHeight,
1619                                   unsigned&        outStride)
1620 {
1621   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1622   DownscaleInPlacePow2Generic<4, HalveScanlineInPlaceRGBA8888, AverageScanlinesRGBA8888>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1623 }
1624
1625 void DownscaleInPlacePow2RGB565(unsigned char*   pixels,
1626                                 unsigned int     inputWidth,
1627                                 unsigned int     inputHeight,
1628                                 unsigned int     inputStride,
1629                                 unsigned int     desiredWidth,
1630                                 unsigned int     desiredHeight,
1631                                 BoxDimensionTest dimensionTest,
1632                                 unsigned int&    outWidth,
1633                                 unsigned int&    outHeight,
1634                                 unsigned int&    outStride)
1635 {
1636   DownscaleInPlacePow2Generic<2, HalveScanlineInPlaceRGB565, AverageScanlinesRGB565>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1637 }
1638
1639 /**
1640  * @copydoc DownscaleInPlacePow2RGB888
1641  *
1642  * For 2-byte formats such as lum8alpha8, but not packed 16 bit formats like RGB565.
1643  */
1644 void DownscaleInPlacePow2ComponentPair(unsigned char*   pixels,
1645                                        unsigned int     inputWidth,
1646                                        unsigned int     inputHeight,
1647                                        unsigned int     inputStride,
1648                                        unsigned int     desiredWidth,
1649                                        unsigned int     desiredHeight,
1650                                        BoxDimensionTest dimensionTest,
1651                                        unsigned&        outWidth,
1652                                        unsigned&        outHeight,
1653                                        unsigned&        outStride)
1654 {
1655   DownscaleInPlacePow2Generic<2, HalveScanlineInPlace2Bytes, AverageScanlines2>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1656 }
1657
1658 void DownscaleInPlacePow2SingleBytePerPixel(unsigned char*   pixels,
1659                                             unsigned int     inputWidth,
1660                                             unsigned int     inputHeight,
1661                                             unsigned int     inputStride,
1662                                             unsigned int     desiredWidth,
1663                                             unsigned int     desiredHeight,
1664                                             BoxDimensionTest dimensionTest,
1665                                             unsigned int&    outWidth,
1666                                             unsigned int&    outHeight,
1667                                             unsigned int&    outStride)
1668 {
1669   DownscaleInPlacePow2Generic<1, HalveScanlineInPlace1Byte, AverageScanlines1>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1670 }
1671
1672 // Point sampling group below
1673
1674 namespace
1675 {
1676 /**
1677  * @brief Point sample an image to a new resolution (like GL_NEAREST).
1678  *
1679  * Template is used purely as a type-safe code generator in this one
1680  * compilation unit. Generated code is inlined into type-specific wrapper
1681  * functions below which are exported to rest of module.
1682  */
1683 template<typename PIXEL>
1684 inline void PointSampleAddressablePixels(const uint8_t* inPixels,
1685                                          unsigned int   inputWidth,
1686                                          unsigned int   inputHeight,
1687                                          unsigned int   inputStride,
1688                                          uint8_t*       outPixels,
1689                                          unsigned int   desiredWidth,
1690                                          unsigned int   desiredHeight)
1691 {
1692   DALI_ASSERT_DEBUG(((desiredWidth <= inputWidth && desiredHeight <= inputHeight) ||
1693                      outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL) || outPixels <= inPixels - desiredWidth * desiredHeight * sizeof(PIXEL)) &&
1694                     "The input and output buffers must not overlap for an upscaling.");
1695   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1696   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1697
1698   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1699   {
1700     return;
1701   }
1702   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
1703   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
1704   const unsigned int deltaX     = (inputWidth << 16u) / desiredWidth;
1705   const unsigned int deltaY     = (inputHeight << 16u) / desiredHeight;
1706
1707   unsigned int inY = 0;
1708   for(unsigned int outY = 0; outY < desiredHeight; ++outY)
1709   {
1710     // Round fixed point y coordinate to nearest integer:
1711     const unsigned int integerY    = (inY + (1u << 15u)) >> 16u;
1712     const PIXEL* const inScanline  = &inAligned[inputStride * integerY];
1713     PIXEL* const       outScanline = &outAligned[desiredWidth * outY];
1714
1715     DALI_ASSERT_DEBUG(integerY < inputHeight);
1716     DALI_ASSERT_DEBUG(reinterpret_cast<const uint8_t*>(inScanline) < (inPixels + inputStride * inputHeight * sizeof(PIXEL)));
1717     DALI_ASSERT_DEBUG(reinterpret_cast<uint8_t*>(outScanline) < (outPixels + desiredWidth * desiredHeight * sizeof(PIXEL)));
1718
1719     unsigned int inX = 0;
1720     for(unsigned int outX = 0; outX < desiredWidth; ++outX)
1721     {
1722       // Round the fixed-point x coordinate to an integer:
1723       const unsigned int integerX       = (inX + (1u << 15u)) >> 16u;
1724       const PIXEL* const inPixelAddress = &inScanline[integerX];
1725       const PIXEL        pixel          = *inPixelAddress;
1726       outScanline[outX]                 = pixel;
1727       inX += deltaX;
1728     }
1729     inY += deltaY;
1730   }
1731 }
1732
1733 } // namespace
1734
1735 // RGBA8888
1736 void PointSample4BPP(const unsigned char* inPixels,
1737                      unsigned int         inputWidth,
1738                      unsigned int         inputHeight,
1739                      unsigned int         inputStride,
1740                      unsigned char*       outPixels,
1741                      unsigned int         desiredWidth,
1742                      unsigned int         desiredHeight)
1743 {
1744   PointSampleAddressablePixels<uint32_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1745 }
1746
1747 // RGB565, LA88
1748 void PointSample2BPP(const unsigned char* inPixels,
1749                      unsigned int         inputWidth,
1750                      unsigned int         inputHeight,
1751                      unsigned int         inputStride,
1752                      unsigned char*       outPixels,
1753                      unsigned int         desiredWidth,
1754                      unsigned int         desiredHeight)
1755 {
1756   PointSampleAddressablePixels<uint16_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1757 }
1758
1759 // L8, A8
1760 void PointSample1BPP(const unsigned char* inPixels,
1761                      unsigned int         inputWidth,
1762                      unsigned int         inputHeight,
1763                      unsigned int         inputStride,
1764                      unsigned char*       outPixels,
1765                      unsigned int         desiredWidth,
1766                      unsigned int         desiredHeight)
1767 {
1768   PointSampleAddressablePixels<uint8_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1769 }
1770
1771 /* RGB888
1772  * RGB888 is a special case as its pixels are not aligned addressable units.
1773  */
1774 void PointSample3BPP(const uint8_t* inPixels,
1775                      unsigned int   inputWidth,
1776                      unsigned int   inputHeight,
1777                      unsigned int   inputStride,
1778                      uint8_t*       outPixels,
1779                      unsigned int   desiredWidth,
1780                      unsigned int   desiredHeight)
1781 {
1782   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1783   {
1784     return;
1785   }
1786   const unsigned int BYTES_PER_PIXEL = 3;
1787
1788   // Generate fixed-point 16.16 deltas in input image coordinates:
1789   const unsigned int deltaX = (inputWidth << 16u) / desiredWidth;
1790   const unsigned int deltaY = (inputHeight << 16u) / desiredHeight;
1791
1792   // Step through output image in whole integer pixel steps while tracking the
1793   // corresponding locations in the input image using 16.16 fixed-point
1794   // coordinates:
1795   unsigned int inY = 0; //< 16.16 fixed-point input image y-coord.
1796   for(unsigned int outY = 0; outY < desiredHeight; ++outY)
1797   {
1798     const unsigned int   integerY    = (inY + (1u << 15u)) >> 16u;
1799     const uint8_t* const inScanline  = &inPixels[inputStride * integerY * BYTES_PER_PIXEL];
1800     uint8_t* const       outScanline = &outPixels[desiredWidth * outY * BYTES_PER_PIXEL];
1801     unsigned int         inX         = 0; //< 16.16 fixed-point input image x-coord.
1802
1803     for(unsigned int outX = 0; outX < desiredWidth * BYTES_PER_PIXEL; outX += BYTES_PER_PIXEL)
1804     {
1805       // Round the fixed-point input coordinate to the address of the input pixel to sample:
1806       const unsigned int   integerX       = (inX + (1u << 15u)) >> 16u;
1807       const uint8_t* const inPixelAddress = &inScanline[integerX * BYTES_PER_PIXEL];
1808
1809       // Issue loads for all pixel color components up-front:
1810       const unsigned int c0 = inPixelAddress[0];
1811       const unsigned int c1 = inPixelAddress[1];
1812       const unsigned int c2 = inPixelAddress[2];
1813       ///@ToDo: Optimise - Benchmark one 32bit load that will be unaligned 2/3 of the time + 3 rotate and masks, versus these three aligned byte loads, versus using an RGB packed, aligned(1) struct and letting compiler pick a strategy.
1814
1815       // Output the pixel components:
1816       outScanline[outX]     = static_cast<uint8_t>(c0);
1817       outScanline[outX + 1] = static_cast<uint8_t>(c1);
1818       outScanline[outX + 2] = static_cast<uint8_t>(c2);
1819
1820       // Increment the fixed-point input coordinate:
1821       inX += deltaX;
1822     }
1823
1824     inY += deltaY;
1825   }
1826 }
1827
1828 // Dispatch to a format-appropriate point sampling function:
1829 void PointSample(const unsigned char* inPixels,
1830                  unsigned int         inputWidth,
1831                  unsigned int         inputHeight,
1832                  unsigned int         inputStride,
1833                  Pixel::Format        pixelFormat,
1834                  unsigned char*       outPixels,
1835                  unsigned int         desiredWidth,
1836                  unsigned int         desiredHeight)
1837 {
1838   // Check the pixel format is one that is supported:
1839   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
1840   {
1841     switch(pixelFormat)
1842     {
1843       case Pixel::RGB888:
1844       {
1845         PointSample3BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1846         break;
1847       }
1848       case Pixel::RGBA8888:
1849       {
1850         PointSample4BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1851         break;
1852       }
1853       case Pixel::RGB565:
1854       case Pixel::LA88:
1855       {
1856         PointSample2BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1857         break;
1858       }
1859       case Pixel::L8:
1860       case Pixel::A8:
1861       {
1862         PointSample1BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1863         break;
1864       }
1865       default:
1866       {
1867         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
1868       }
1869     }
1870   }
1871   else
1872   {
1873     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not point sampled: unsupported pixel format: %u.\n", unsigned(pixelFormat));
1874   }
1875 }
1876
1877 // Linear sampling group below
1878
1879 namespace
1880 {
1881 /** @brief Blend 4 pixels together using horizontal and vertical weights. */
1882 inline uint8_t BilinearFilter1BPPByte(uint8_t tl, uint8_t tr, uint8_t bl, uint8_t br, unsigned int fractBlendHorizontal, unsigned int fractBlendVertical)
1883 {
1884   return static_cast<uint8_t>(BilinearFilter1Component(tl, tr, bl, br, fractBlendHorizontal, fractBlendVertical));
1885 }
1886
1887 /** @copydoc BilinearFilter1BPPByte */
1888 inline Pixel2Bytes BilinearFilter2Bytes(Pixel2Bytes tl, Pixel2Bytes tr, Pixel2Bytes bl, Pixel2Bytes br, unsigned int fractBlendHorizontal, unsigned int fractBlendVertical)
1889 {
1890   Pixel2Bytes pixel;
1891   pixel.l = static_cast<uint8_t>(BilinearFilter1Component(tl.l, tr.l, bl.l, br.l, fractBlendHorizontal, fractBlendVertical));
1892   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
1893   return pixel;
1894 }
1895
1896 /** @copydoc BilinearFilter1BPPByte */
1897 inline Pixel3Bytes BilinearFilterRGB888(Pixel3Bytes tl, Pixel3Bytes tr, Pixel3Bytes bl, Pixel3Bytes br, unsigned int fractBlendHorizontal, unsigned int fractBlendVertical)
1898 {
1899   Pixel3Bytes pixel;
1900   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
1901   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
1902   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
1903   return pixel;
1904 }
1905
1906 /** @copydoc BilinearFilter1BPPByte */
1907 inline PixelRGB565 BilinearFilterRGB565(PixelRGB565 tl, PixelRGB565 tr, PixelRGB565 bl, PixelRGB565 br, unsigned int fractBlendHorizontal, unsigned int fractBlendVertical)
1908 {
1909   const PixelRGB565 pixel = static_cast<PixelRGB565>((BilinearFilter1Component(tl >> 11u, tr >> 11u, bl >> 11u, br >> 11u, fractBlendHorizontal, fractBlendVertical) << 11u) +
1910                                                      (BilinearFilter1Component((tl >> 5u) & 63u, (tr >> 5u) & 63u, (bl >> 5u) & 63u, (br >> 5u) & 63u, fractBlendHorizontal, fractBlendVertical) << 5u) +
1911                                                      BilinearFilter1Component(tl & 31u, tr & 31u, bl & 31u, br & 31u, fractBlendHorizontal, fractBlendVertical));
1912   return pixel;
1913 }
1914
1915 /** @copydoc BilinearFilter1BPPByte */
1916 inline Pixel4Bytes BilinearFilter4Bytes(Pixel4Bytes tl, Pixel4Bytes tr, Pixel4Bytes bl, Pixel4Bytes br, unsigned int fractBlendHorizontal, unsigned int fractBlendVertical)
1917 {
1918   Pixel4Bytes pixel;
1919   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
1920   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
1921   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
1922   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
1923   return pixel;
1924 }
1925
1926 /**
1927  * @brief Generic version of bilinear sampling image resize function.
1928  * @note Limited to one compilation unit and exposed through type-specific
1929  * wrapper functions below.
1930  */
1931 template<
1932   typename PIXEL,
1933   PIXEL (*BilinearFilter)(PIXEL tl, PIXEL tr, PIXEL bl, PIXEL br, unsigned int fractBlendHorizontal, unsigned int fractBlendVertical),
1934   bool DEBUG_ASSERT_ALIGNMENT>
1935 inline void LinearSampleGeneric(const unsigned char* __restrict__ inPixels,
1936                                 ImageDimensions inputDimensions,
1937                                 unsigned int    inputStride,
1938                                 unsigned char* __restrict__ outPixels,
1939                                 ImageDimensions desiredDimensions)
1940 {
1941   const unsigned int inputWidth    = inputDimensions.GetWidth();
1942   const unsigned int inputHeight   = inputDimensions.GetHeight();
1943   const unsigned int desiredWidth  = desiredDimensions.GetWidth();
1944   const unsigned int desiredHeight = desiredDimensions.GetHeight();
1945
1946   DALI_ASSERT_DEBUG(((outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL)) ||
1947                      (inPixels >= outPixels + desiredWidth * desiredHeight * sizeof(PIXEL))) &&
1948                     "Input and output buffers cannot overlap.");
1949   if(DEBUG_ASSERT_ALIGNMENT)
1950   {
1951     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1952     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1953   }
1954
1955   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1956   {
1957     return;
1958   }
1959   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
1960   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
1961   const unsigned int deltaX     = (inputWidth << 16u) / desiredWidth;
1962   const unsigned int deltaY     = (inputHeight << 16u) / desiredHeight;
1963
1964   unsigned int inY = 0;
1965   for(unsigned int outY = 0; outY < desiredHeight; ++outY)
1966   {
1967     PIXEL* const outScanline = &outAligned[desiredWidth * outY];
1968
1969     // Find the two scanlines to blend and the weight to blend with:
1970     const unsigned int integerY1    = inY >> 16u;
1971     const unsigned int integerY2    = integerY1 >= inputHeight ? integerY1 : integerY1 + 1;
1972     const unsigned int inputYWeight = inY & 65535u;
1973
1974     DALI_ASSERT_DEBUG(integerY1 < inputHeight);
1975     DALI_ASSERT_DEBUG(integerY2 < inputHeight);
1976
1977     const PIXEL* const inScanline1 = &inAligned[inputStride * integerY1];
1978     const PIXEL* const inScanline2 = &inAligned[inputStride * integerY2];
1979
1980     unsigned int inX = 0;
1981     for(unsigned int outX = 0; outX < desiredWidth; ++outX)
1982     {
1983       // Work out the two pixel scanline offsets for this cluster of four samples:
1984       const unsigned int integerX1 = inX >> 16u;
1985       const unsigned int integerX2 = integerX1 >= inputWidth ? integerX1 : integerX1 + 1;
1986
1987       // Execute the loads:
1988       const PIXEL pixel1 = inScanline1[integerX1];
1989       const PIXEL pixel2 = inScanline2[integerX1];
1990       const PIXEL pixel3 = inScanline1[integerX2];
1991       const PIXEL pixel4 = inScanline2[integerX2];
1992       ///@ToDo Optimise - for 1 and 2  and 4 byte types to execute a single 2, 4, or 8 byte load per pair (caveat clamping) and let half of them be unaligned.
1993
1994       // Weighted bilinear filter:
1995       const unsigned int inputXWeight = inX & 65535u;
1996       outScanline[outX]               = BilinearFilter(pixel1, pixel3, pixel2, pixel4, inputXWeight, inputYWeight);
1997
1998       inX += deltaX;
1999     }
2000     inY += deltaY;
2001   }
2002 }
2003
2004 } // namespace
2005
2006 // Format-specific linear scaling instantiations:
2007
2008 void LinearSample1BPP(const unsigned char* __restrict__ inPixels,
2009                       ImageDimensions inputDimensions,
2010                       unsigned int    inputStride,
2011                       unsigned char* __restrict__ outPixels,
2012                       ImageDimensions desiredDimensions)
2013 {
2014   LinearSampleGeneric<uint8_t, BilinearFilter1BPPByte, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2015 }
2016
2017 void LinearSample2BPP(const unsigned char* __restrict__ inPixels,
2018                       ImageDimensions inputDimensions,
2019                       unsigned int    inputStride,
2020                       unsigned char* __restrict__ outPixels,
2021                       ImageDimensions desiredDimensions)
2022 {
2023   LinearSampleGeneric<Pixel2Bytes, BilinearFilter2Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2024 }
2025
2026 void LinearSampleRGB565(const unsigned char* __restrict__ inPixels,
2027                         ImageDimensions inputDimensions,
2028                         unsigned int    inputStride,
2029                         unsigned char* __restrict__ outPixels,
2030                         ImageDimensions desiredDimensions)
2031 {
2032   LinearSampleGeneric<PixelRGB565, BilinearFilterRGB565, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2033 }
2034
2035 void LinearSample3BPP(const unsigned char* __restrict__ inPixels,
2036                       ImageDimensions inputDimensions,
2037                       unsigned int    inputStride,
2038                       unsigned char* __restrict__ outPixels,
2039                       ImageDimensions desiredDimensions)
2040 {
2041   LinearSampleGeneric<Pixel3Bytes, BilinearFilterRGB888, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2042 }
2043
2044 void LinearSample4BPP(const unsigned char* __restrict__ inPixels,
2045                       ImageDimensions inputDimensions,
2046                       unsigned int    inputStride,
2047                       unsigned char* __restrict__ outPixels,
2048                       ImageDimensions desiredDimensions)
2049 {
2050   LinearSampleGeneric<Pixel4Bytes, BilinearFilter4Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2051 }
2052
2053 void Resample(const unsigned char* __restrict__ inPixels,
2054               ImageDimensions inputDimensions,
2055               unsigned int    inputStride,
2056               unsigned char* __restrict__ outPixels,
2057               ImageDimensions   desiredDimensions,
2058               Resampler::Filter filterType,
2059               int               numChannels,
2060               bool              hasAlpha)
2061 {
2062   // Got from the test.cpp of the ImageResampler lib.
2063   const float ONE_DIV_255               = 1.0f / 255.0f;
2064   const int   MAX_UNSIGNED_CHAR         = std::numeric_limits<uint8_t>::max();
2065   const int   LINEAR_TO_SRGB_TABLE_SIZE = 4096;
2066   const int   ALPHA_CHANNEL             = hasAlpha ? (numChannels - 1) : 0;
2067
2068   static bool          loadColorSpaces = true;
2069   static float         srgbToLinear[MAX_UNSIGNED_CHAR + 1];
2070   static unsigned char linearToSrgb[LINEAR_TO_SRGB_TABLE_SIZE];
2071
2072   if(loadColorSpaces) // Only create the color space conversions on the first execution
2073   {
2074     loadColorSpaces = false;
2075
2076     for(int i = 0; i <= MAX_UNSIGNED_CHAR; ++i)
2077     {
2078       srgbToLinear[i] = pow(static_cast<float>(i) * ONE_DIV_255, DEFAULT_SOURCE_GAMMA);
2079     }
2080
2081     const float invLinearToSrgbTableSize = 1.0f / static_cast<float>(LINEAR_TO_SRGB_TABLE_SIZE);
2082     const float invSourceGamma           = 1.0f / DEFAULT_SOURCE_GAMMA;
2083
2084     for(int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
2085     {
2086       int k = static_cast<int>(255.0f * pow(static_cast<float>(i) * invLinearToSrgbTableSize, invSourceGamma) + 0.5f);
2087       if(k < 0)
2088       {
2089         k = 0;
2090       }
2091       else if(k > MAX_UNSIGNED_CHAR)
2092       {
2093         k = MAX_UNSIGNED_CHAR;
2094       }
2095       linearToSrgb[i] = static_cast<unsigned char>(k);
2096     }
2097   }
2098
2099   std::vector<Resampler*>    resamplers(numChannels);
2100   std::vector<Vector<float>> samples(numChannels);
2101
2102   const int srcWidth  = inputDimensions.GetWidth();
2103   const int srcHeight = inputDimensions.GetHeight();
2104   const int dstWidth  = desiredDimensions.GetWidth();
2105   const int dstHeight = desiredDimensions.GetHeight();
2106
2107   // Now create a Resampler instance for each component to process. The first instance will create new contributor tables, which are shared by the resamplers
2108   // used for the other components (a memory and slight cache efficiency optimization).
2109   resamplers[0] = new Resampler(srcWidth,
2110                                 srcHeight,
2111                                 dstWidth,
2112                                 dstHeight,
2113                                 Resampler::BOUNDARY_CLAMP,
2114                                 0.0f,          // sample_low,
2115                                 1.0f,          // sample_high. Clamp output samples to specified range, or disable clamping if sample_low >= sample_high.
2116                                 filterType,    // The type of filter.
2117                                 NULL,          // Pclist_x,
2118                                 NULL,          // Pclist_y. Optional pointers to contributor lists from another instance of a Resampler.
2119                                 FILTER_SCALE,  // src_x_ofs,
2120                                 FILTER_SCALE); // src_y_ofs. Offset input image by specified amount (fractional values okay).
2121   samples[0].ResizeUninitialized(srcWidth);
2122   for(int i = 1; i < numChannels; ++i)
2123   {
2124     resamplers[i] = new Resampler(srcWidth,
2125                                   srcHeight,
2126                                   dstWidth,
2127                                   dstHeight,
2128                                   Resampler::BOUNDARY_CLAMP,
2129                                   0.0f,
2130                                   1.0f,
2131                                   filterType,
2132                                   resamplers[0]->get_clist_x(),
2133                                   resamplers[0]->get_clist_y(),
2134                                   FILTER_SCALE,
2135                                   FILTER_SCALE);
2136     samples[i].ResizeUninitialized(srcWidth);
2137   }
2138
2139   const int srcPitch = inputStride * numChannels;
2140   const int dstPitch = dstWidth * numChannels;
2141   int       dstY     = 0;
2142
2143   for(int srcY = 0; srcY < srcHeight; ++srcY)
2144   {
2145     const unsigned char* pSrc = &inPixels[srcY * srcPitch];
2146
2147     for(int x = 0; x < srcWidth; ++x)
2148     {
2149       for(int c = 0; c < numChannels; ++c)
2150       {
2151         if(c == ALPHA_CHANNEL && hasAlpha)
2152         {
2153           samples[c][x] = *pSrc++ * ONE_DIV_255;
2154         }
2155         else
2156         {
2157           samples[c][x] = srgbToLinear[*pSrc++];
2158         }
2159       }
2160     }
2161
2162     for(int c = 0; c < numChannels; ++c)
2163     {
2164       if(!resamplers[c]->put_line(&samples[c][0]))
2165       {
2166         DALI_ASSERT_DEBUG(!"Out of memory");
2167       }
2168     }
2169
2170     for(;;)
2171     {
2172       int compIndex;
2173       for(compIndex = 0; compIndex < numChannels; ++compIndex)
2174       {
2175         const float* pOutputSamples = resamplers[compIndex]->get_line();
2176         if(!pOutputSamples)
2177         {
2178           break;
2179         }
2180
2181         const bool isAlphaChannel = (compIndex == ALPHA_CHANNEL && hasAlpha);
2182         DALI_ASSERT_DEBUG(dstY < dstHeight);
2183         unsigned char* pDst = &outPixels[dstY * dstPitch + compIndex];
2184
2185         for(int x = 0; x < dstWidth; ++x)
2186         {
2187           if(isAlphaChannel)
2188           {
2189             int c = static_cast<int>(255.0f * pOutputSamples[x] + 0.5f);
2190             if(c < 0)
2191             {
2192               c = 0;
2193             }
2194             else if(c > MAX_UNSIGNED_CHAR)
2195             {
2196               c = MAX_UNSIGNED_CHAR;
2197             }
2198             *pDst = static_cast<unsigned char>(c);
2199           }
2200           else
2201           {
2202             int j = static_cast<int>(LINEAR_TO_SRGB_TABLE_SIZE * pOutputSamples[x] + 0.5f);
2203             if(j < 0)
2204             {
2205               j = 0;
2206             }
2207             else if(j >= LINEAR_TO_SRGB_TABLE_SIZE)
2208             {
2209               j = LINEAR_TO_SRGB_TABLE_SIZE - 1;
2210             }
2211             *pDst = linearToSrgb[j];
2212           }
2213
2214           pDst += numChannels;
2215         }
2216       }
2217       if(compIndex < numChannels)
2218       {
2219         break;
2220       }
2221
2222       ++dstY;
2223     }
2224   }
2225
2226   // Delete the resamplers.
2227   for(int i = 0; i < numChannels; ++i)
2228   {
2229     delete resamplers[i];
2230   }
2231 }
2232
2233 void LanczosSample4BPP(const unsigned char* __restrict__ inPixels,
2234                        ImageDimensions inputDimensions,
2235                        unsigned int    inputStride,
2236                        unsigned char* __restrict__ outPixels,
2237                        ImageDimensions desiredDimensions)
2238 {
2239   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 4, true);
2240 }
2241
2242 void LanczosSample1BPP(const unsigned char* __restrict__ inPixels,
2243                        ImageDimensions inputDimensions,
2244                        unsigned int    inputStride,
2245                        unsigned char* __restrict__ outPixels,
2246                        ImageDimensions desiredDimensions)
2247 {
2248   // For L8 images
2249   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 1, false);
2250 }
2251
2252 // Dispatch to a format-appropriate linear sampling function:
2253 void LinearSample(const unsigned char* __restrict__ inPixels,
2254                   ImageDimensions inDimensions,
2255                   unsigned int    inStride,
2256                   Pixel::Format   pixelFormat,
2257                   unsigned char* __restrict__ outPixels,
2258                   ImageDimensions outDimensions)
2259 {
2260   // Check the pixel format is one that is supported:
2261   if(pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::RGB565)
2262   {
2263     switch(pixelFormat)
2264     {
2265       case Pixel::RGB888:
2266       {
2267         LinearSample3BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2268         break;
2269       }
2270       case Pixel::RGBA8888:
2271       {
2272         LinearSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2273         break;
2274       }
2275       case Pixel::L8:
2276       case Pixel::A8:
2277       {
2278         LinearSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2279         break;
2280       }
2281       case Pixel::LA88:
2282       {
2283         LinearSample2BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2284         break;
2285       }
2286       case Pixel::RGB565:
2287       {
2288         LinearSampleRGB565(inPixels, inDimensions, inStride, outPixels, outDimensions);
2289         break;
2290       }
2291       default:
2292       {
2293         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2294       }
2295     }
2296   }
2297   else
2298   {
2299     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not linear sampled: unsupported pixel format: %u.\n", unsigned(pixelFormat));
2300   }
2301 }
2302
2303 void RotateByShear(const uint8_t* const pixelsIn,
2304                    unsigned int         widthIn,
2305                    unsigned int         heightIn,
2306                    unsigned int         strideIn,
2307                    unsigned int         pixelSize,
2308                    float                radians,
2309                    uint8_t*&            pixelsOut,
2310                    unsigned int&        widthOut,
2311                    unsigned int&        heightOut)
2312 {
2313   // @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
2314
2315   // Do first the fast rotations to transform the angle into a (-45..45] range.
2316
2317   float fastRotationPerformed = false;
2318   if((radians > Math::PI_4) && (radians <= RAD_135))
2319   {
2320     // Angle in (45.0 .. 135.0]
2321     // Rotate image by 90 degrees into temporary image,
2322     // so it requires only an extra rotation angle
2323     // of -45.0 .. +45.0 to complete rotation.
2324     fastRotationPerformed = Rotate90(pixelsIn,
2325                                      widthIn,
2326                                      heightIn,
2327                                      strideIn,
2328                                      pixelSize,
2329                                      pixelsOut,
2330                                      widthOut,
2331                                      heightOut);
2332
2333     if(!fastRotationPerformed)
2334     {
2335       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2336       // The fast rotation failed.
2337       return;
2338     }
2339
2340     radians -= Math::PI_2;
2341   }
2342   else if((radians > RAD_135) && (radians <= RAD_225))
2343   {
2344     // Angle in (135.0 .. 225.0]
2345     // Rotate image by 180 degrees into temporary image,
2346     // so it requires only an extra rotation angle
2347     // of -45.0 .. +45.0 to complete rotation.
2348
2349     fastRotationPerformed = Rotate180(pixelsIn,
2350                                       widthIn,
2351                                       heightIn,
2352                                       strideIn,
2353                                       pixelSize,
2354                                       pixelsOut);
2355
2356     if(!fastRotationPerformed)
2357     {
2358       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2359       // The fast rotation failed.
2360       return;
2361     }
2362
2363     radians -= Math::PI;
2364     widthOut  = widthIn;
2365     heightOut = heightIn;
2366   }
2367   else if((radians > RAD_225) && (radians <= RAD_315))
2368   {
2369     // Angle in (225.0 .. 315.0]
2370     // Rotate image by 270 degrees into temporary image,
2371     // so it requires only an extra rotation angle
2372     // of -45.0 .. +45.0 to complete rotation.
2373
2374     fastRotationPerformed = Rotate270(pixelsIn,
2375                                       widthIn,
2376                                       heightIn,
2377                                       strideIn,
2378                                       pixelSize,
2379                                       pixelsOut,
2380                                       widthOut,
2381                                       heightOut);
2382
2383     if(!fastRotationPerformed)
2384     {
2385       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2386       // The fast rotation failed.
2387       return;
2388     }
2389
2390     radians -= RAD_270;
2391   }
2392
2393   if(fabs(radians) < Dali::Math::MACHINE_EPSILON_10)
2394   {
2395     // Nothing else to do if the angle is zero.
2396     // The rotation angle was 90, 180 or 270.
2397
2398     // @note Allocated memory by 'Fast Rotations', if any, has to be freed by the called to this function.
2399     return;
2400   }
2401
2402   const uint8_t* const                      firstHorizontalSkewPixelsIn = fastRotationPerformed ? pixelsOut : pixelsIn;
2403   std::unique_ptr<uint8_t, void (*)(void*)> tmpPixelsInPtr((fastRotationPerformed ? pixelsOut : nullptr), free);
2404
2405   unsigned int stride = fastRotationPerformed ? widthOut : strideIn;
2406
2407   // Reset the input/output
2408   widthIn   = widthOut;
2409   heightIn  = heightOut;
2410   pixelsOut = nullptr;
2411
2412   const float angleSinus   = sin(radians);
2413   const float angleCosinus = cos(radians);
2414   const float angleTangent = tan(0.5f * radians);
2415
2416   ///////////////////////////////////////
2417   // Perform 1st shear (horizontal)
2418   ///////////////////////////////////////
2419
2420   // Calculate first shear (horizontal) destination image dimensions
2421
2422   widthOut  = widthIn + static_cast<unsigned int>(fabs(angleTangent) * static_cast<float>(heightIn));
2423   heightOut = heightIn;
2424
2425   // Allocate the buffer for the 1st shear
2426   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2427
2428   if(nullptr == pixelsOut)
2429   {
2430     widthOut  = 0u;
2431     heightOut = 0u;
2432
2433     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2434
2435     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Fast rotations'.
2436     // Nothing else to do if the memory allocation fails.
2437     return;
2438   }
2439
2440   for(unsigned int y = 0u; y < heightOut; ++y)
2441   {
2442     const float shear = angleTangent * ((angleTangent >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2443
2444     const int intShear = static_cast<int>(floor(shear));
2445     HorizontalSkew(firstHorizontalSkewPixelsIn, widthIn, stride, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2446   }
2447
2448   // Reset the 'pixel in' pointer with the output of the 'First Horizontal Skew' and free the memory allocated by the 'Fast Rotations'.
2449   tmpPixelsInPtr.reset(pixelsOut);
2450   unsigned int tmpWidthIn  = widthOut;
2451   unsigned int tmpHeightIn = heightOut;
2452
2453   // Reset the input/output
2454   pixelsOut = nullptr;
2455
2456   ///////////////////////////////////////
2457   // Perform 2nd shear (vertical)
2458   ///////////////////////////////////////
2459
2460   // Calc 2nd shear (vertical) destination image dimensions
2461   heightOut = static_cast<unsigned int>(static_cast<float>(widthIn) * fabs(angleSinus) + static_cast<float>(heightIn) * angleCosinus);
2462
2463   // Allocate the buffer for the 2nd shear
2464   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2465
2466   if(nullptr == pixelsOut)
2467   {
2468     widthOut  = 0u;
2469     heightOut = 0u;
2470
2471     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2472     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'First Horizontal Skew'.
2473     // Nothing else to do if the memory allocation fails.
2474     return;
2475   }
2476
2477   // Variable skew offset
2478   float offset = angleSinus * ((angleSinus > 0.f) ? static_cast<float>(widthIn - 1u) : -(static_cast<float>(widthIn) - static_cast<float>(widthOut)));
2479
2480   unsigned int column = 0u;
2481   for(column = 0u; column < widthOut; ++column, offset -= angleSinus)
2482   {
2483     const int shear = static_cast<int>(floor(offset));
2484     VerticalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpHeightIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, heightOut, column, shear, offset - static_cast<float>(shear));
2485   }
2486   // Reset the 'pixel in' pointer with the output of the 'Vertical Skew' and free the memory allocated by the 'First Horizontal Skew'.
2487   // Reset the input/output
2488   tmpPixelsInPtr.reset(pixelsOut);
2489   tmpWidthIn  = widthOut;
2490   tmpHeightIn = heightOut;
2491   pixelsOut   = nullptr;
2492
2493   ///////////////////////////////////////
2494   // Perform 3rd shear (horizontal)
2495   ///////////////////////////////////////
2496
2497   // Calc 3rd shear (horizontal) destination image dimensions
2498   widthOut = static_cast<unsigned int>(static_cast<float>(heightIn) * fabs(angleSinus) + static_cast<float>(widthIn) * angleCosinus) + 1u;
2499
2500   // Allocate the buffer for the 3rd shear
2501   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2502
2503   if(nullptr == pixelsOut)
2504   {
2505     widthOut  = 0u;
2506     heightOut = 0u;
2507
2508     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2509     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2510     // Nothing else to do if the memory allocation fails.
2511     return;
2512   }
2513
2514   offset = (angleSinus >= 0.f) ? -angleSinus * angleTangent * static_cast<float>(widthIn - 1u) : angleTangent * (static_cast<float>(widthIn - 1u) * -angleSinus + (1.f - static_cast<float>(heightOut)));
2515
2516   for(unsigned int y = 0u; y < heightOut; ++y, offset += angleTangent)
2517   {
2518     const int shear = static_cast<int>(floor(offset));
2519     HorizontalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, y, shear, offset - static_cast<float>(shear));
2520   }
2521
2522   // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2523   // @note Allocated memory by the last 'Horizontal Skew' has to be freed by the caller to this function.
2524 }
2525
2526 void HorizontalShear(const uint8_t* const pixelsIn,
2527                      unsigned int         widthIn,
2528                      unsigned int         heightIn,
2529                      unsigned int         strideIn,
2530                      unsigned int         pixelSize,
2531                      float                radians,
2532                      uint8_t*&            pixelsOut,
2533                      unsigned int&        widthOut,
2534                      unsigned int&        heightOut)
2535 {
2536   // Calculate the destination image dimensions.
2537
2538   const float absRadians = fabs(radians);
2539
2540   if(absRadians > Math::PI_4)
2541   {
2542     // Can't shear more than 45 degrees.
2543     widthOut  = 0u;
2544     heightOut = 0u;
2545
2546     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Can't shear more than 45 degrees (PI/4 radians). radians : %f\n", radians);
2547     return;
2548   }
2549
2550   widthOut  = widthIn + static_cast<unsigned int>(ceil(absRadians * static_cast<float>(heightIn)));
2551   heightOut = heightIn;
2552
2553   // Allocate the buffer for the shear.
2554   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2555
2556   if(nullptr == pixelsOut)
2557   {
2558     widthOut  = 0u;
2559     heightOut = 0u;
2560
2561     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2562     return;
2563   }
2564
2565   for(unsigned int y = 0u; y < heightOut; ++y)
2566   {
2567     const float shear = radians * ((radians >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2568
2569     const int intShear = static_cast<int>(floor(shear));
2570     HorizontalSkew(pixelsIn, widthIn, strideIn, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2571   }
2572 }
2573
2574 } /* namespace Platform */
2575 } /* namespace Internal */
2576 } /* namespace Dali */