dali/internal/imaging/common/image-operations.cpp

   1 /*
   2  * Copyright (c) 2024 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 #include <dali/internal/imaging/common/image-operations.h>
  19
  20 // EXTERNAL INCLUDES
  21 #include <dali/devel-api/adaptor-framework/image-loading.h>
  22 #include <dali/integration-api/debug.h>
  23 #include <dali/integration-api/trace.h>
  24 #include <dali/public-api/common/dali-vector.h>
  25 #include <dali/public-api/math/vector2.h>
  26 #include <stddef.h>
  27 #include <third-party/resampler/resampler.h>
  28 #include <cmath>
  29 #include <cstring>
  30 #include <limits>
  31 #include <memory>
  32
  33 // INTERNAL INCLUDES
  34
  35 namespace Dali
  36 {
  37 namespace Internal
  38 {
  39 namespace Platform
  40 {
  41 namespace
  42 {
  43 // The BORDER_FILL_VALUE is a single byte value that is used for horizontal and vertical borders.
  44 // A value of 0x00 gives us transparency for pixel buffers with an alpha channel, or black otherwise.
  45 // We can optionally use a Vector4 color here, but at reduced fill speed.
  46 const uint8_t BORDER_FILL_VALUE(0x00);
  47 // A maximum size limit for newly created bitmaps. ( 1u << 16 ) - 1 is chosen as we are using 16bit words for dimensions.
  48 const uint32_t MAXIMUM_TARGET_BITMAP_SIZE((1u << 16) - 1);
  49
  50 // Constants used by the ImageResampler.
  51 const float DEFAULT_SOURCE_GAMMA = 1.75f; ///< Default source gamma value used in the Resampler() function. Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction.
  52 const float FILTER_SCALE         = 1.f;   ///< Default filter scale value used in the Resampler() function. Filter scale - values < 1.0 cause aliasing, but create sharper looking mips.
  53
  54 const float RAD_135 = Math::PI_2 + Math::PI_4; ///< 135 degrees in radians;
  55 const float RAD_225 = RAD_135 + Math::PI_2;    ///< 225 degrees in radians;
  56 const float RAD_270 = 3.f * Math::PI_2;        ///< 270 degrees in radians;
  57 const float RAD_315 = RAD_225 + Math::PI_2;    ///< 315 degrees in radians;
  58
  59 using Integration::Bitmap;
  60 using Integration::BitmapPtr;
  61 typedef uint8_t PixelBuffer;
  62
  63 DALI_INIT_TRACE_FILTER(gTraceFilter, DALI_TRACE_IMAGE_PERFORMANCE_MARKER, false);
  64
  65 /**
  66  * @brief 4 byte pixel structure.
  67  */
  68 struct Pixel4Bytes
  69 {
  70   uint8_t r;
  71   uint8_t g;
  72   uint8_t b;
  73   uint8_t a;
  74 } __attribute__((packed, aligned(4))); //< Tell the compiler it is okay to use a single 32 bit load.
  75
  76 /**
  77  * @brief RGB888 pixel structure.
  78  */
  79 struct Pixel3Bytes
  80 {
  81   uint8_t r;
  82   uint8_t g;
  83   uint8_t b;
  84 } __attribute__((packed, aligned(1)));
  85
  86 /**
  87  * @brief RGB565 pixel typedefed from a short.
  88  *
  89  * Access fields by manual shifting and masking.
  90  */
  91 typedef uint16_t PixelRGB565;
  92
  93 /**
  94  * @brief a Pixel composed of two independent byte components.
  95  */
  96 struct Pixel2Bytes
  97 {
  98   uint8_t l;
  99   uint8_t a;
 100 } __attribute__((packed, aligned(2))); //< Tell the compiler it is okay to use a single 16 bit load.
 101
 102 #if defined(DEBUG_ENABLED)
 103 /**
 104  * Disable logging of image operations or make it verbose from the commandline
 105  * as follows (e.g., for dali demo app):
 106  * <code>
 107  * LOG_IMAGE_OPERATIONS=0 dali-demo #< off
 108  * LOG_IMAGE_OPERATIONS=3 dali-demo #< on, verbose
 109  * </code>
 110  */
 111 Debug::Filter* gImageOpsLogFilter = Debug::Filter::New(Debug::NoLogging, false, "LOG_IMAGE_OPERATIONS");
 112 #endif
 113
 114 /** @return The greatest even number less than or equal to the argument. */
 115 inline uint32_t EvenDown(const uint32_t a)
 116 {
 117   const uint32_t evened = a & ~1u;
 118   return evened;
 119 }
 120
 121 /**
 122  * @brief Log bad parameters.
 123  */
 124 void ValidateScalingParameters(const uint32_t inputWidth,
 125                                const uint32_t inputHeight,
 126                                const uint32_t desiredWidth,
 127                                const uint32_t desiredHeight)
 128 {
 129   if(desiredWidth > inputWidth || desiredHeight > inputHeight)
 130   {
 131     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Upscaling not supported (%u, %u -> %u, %u).\n", inputWidth, inputHeight, desiredWidth, desiredHeight);
 132   }
 133
 134   if(desiredWidth == 0u || desiredHeight == 0u)
 135   {
 136     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Downscaling to a zero-area target is pointless.\n");
 137   }
 138
 139   if(inputWidth == 0u || inputHeight == 0u)
 140   {
 141     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Zero area images cannot be scaled\n");
 142   }
 143 }
 144
 145 /**
 146  * @brief Do debug assertions common to all scanline halving functions.
 147  * @note Inline and in anon namespace so should boil away in release builds.
 148  */
 149 inline void DebugAssertScanlineParameters(const uint8_t* const pixels, const uint32_t width)
 150 {
 151   DALI_ASSERT_DEBUG(pixels && "Null pointer.");
 152   DALI_ASSERT_DEBUG(width > 1u && "Can't average fewer than two pixels.");
 153   DALI_ASSERT_DEBUG(width < 131072u && "Unusually wide image: are you sure you meant to pass that value in?");
 154 }
 155
 156 /**
 157  * @brief Assertions on params to functions averaging pairs of scanlines.
 158  * @note Inline as intended to boil away in release.
 159  */
 160 inline void DebugAssertDualScanlineParameters(const uint8_t* const scanline1,
 161                                               const uint8_t* const scanline2,
 162                                               uint8_t* const       outputScanline,
 163                                               const size_t         widthInComponents)
 164 {
 165   DALI_ASSERT_DEBUG(scanline1 && "Null pointer.");
 166   DALI_ASSERT_DEBUG(scanline2 && "Null pointer.");
 167   DALI_ASSERT_DEBUG(outputScanline && "Null pointer.");
 168   DALI_ASSERT_DEBUG(((scanline1 >= scanline2 + widthInComponents) || (scanline2 >= scanline1 + widthInComponents)) && "Scanlines alias.");
 169   DALI_ASSERT_DEBUG(((outputScanline >= (scanline2 + widthInComponents)) || (scanline2 >= (scanline1 + widthInComponents))) && "Scanline 2 aliases output.");
 170 }
 171
 172 /**
 173  * @brief Converts a scaling mode to the definition of which dimensions matter when box filtering as a part of that mode.
 174  */
 175 BoxDimensionTest DimensionTestForScalingMode(FittingMode::Type fittingMode)
 176 {
 177   BoxDimensionTest dimensionTest;
 178   dimensionTest = BoxDimensionTestEither;
 179
 180   switch(fittingMode)
 181   {
 182     // Shrink to fit attempts to make one or zero dimensions smaller than the
 183     // desired dimensions and one or two dimensions exactly the same as the desired
 184     // ones, so as long as one dimension is larger than the desired size, box
 185     // filtering can continue even if the second dimension is smaller than the
 186     // desired dimensions:
 187     case FittingMode::SHRINK_TO_FIT:
 188     {
 189       dimensionTest = BoxDimensionTestEither;
 190       break;
 191     }
 192     // Scale to fill mode keeps both dimensions at least as large as desired:
 193     case FittingMode::SCALE_TO_FILL:
 194     case FittingMode::VISUAL_FITTING:
 195     {
 196       dimensionTest = BoxDimensionTestBoth;
 197       break;
 198     }
 199     // Y dimension is irrelevant when downscaling in FIT_WIDTH mode:
 200     case FittingMode::FIT_WIDTH:
 201     {
 202       dimensionTest = BoxDimensionTestX;
 203       break;
 204     }
 205     // X Dimension is ignored by definition in FIT_HEIGHT mode:
 206     case FittingMode::FIT_HEIGHT:
 207     {
 208       dimensionTest = BoxDimensionTestY;
 209       break;
 210     }
 211   }
 212
 213   return dimensionTest;
 214 }
 215
 216 /**
 217  * @brief Work out the dimensions for a uniform scaling of the input to map it
 218  * into the target while effecting ShinkToFit scaling mode.
 219  */
 220 ImageDimensions FitForShrinkToFit(ImageDimensions target, ImageDimensions source)
 221 {
 222   // Scale the input by the least extreme of the two dimensions:
 223   const float widthScale  = target.GetX() / float(source.GetX());
 224   const float heightScale = target.GetY() / float(source.GetY());
 225   const float scale       = widthScale < heightScale ? widthScale : heightScale;
 226
 227   // Do no scaling at all if the result would increase area:
 228   if(scale >= 1.0f)
 229   {
 230     return source;
 231   }
 232
 233   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 234 }
 235
 236 /**
 237  * @brief Work out the dimensions for a uniform scaling of the input to map it
 238  * into the target while effecting SCALE_TO_FILL scaling mode.
 239  * @note An image scaled into the output dimensions will need either top and
 240  * bottom or left and right to be cropped away unless the source was pre-cropped
 241  * to match the destination aspect ratio.
 242  */
 243 ImageDimensions FitForScaleToFill(ImageDimensions target, ImageDimensions source)
 244 {
 245   DALI_ASSERT_DEBUG(source.GetX() > 0 && source.GetY() > 0 && "Zero-area rectangles should not be passed-in");
 246   // Scale the input by the least extreme of the two dimensions:
 247   const float widthScale  = target.GetX() / float(source.GetX());
 248   const float heightScale = target.GetY() / float(source.GetY());
 249   const float scale       = widthScale > heightScale ? widthScale : heightScale;
 250
 251   // Do no scaling at all if the result would increase area:
 252   if(scale >= 1.0f)
 253   {
 254     return source;
 255   }
 256
 257   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 258 }
 259
 260 /**
 261  * @brief Work out the dimensions for a uniform scaling of the input to map it
 262  * into the target while effecting FIT_WIDTH scaling mode.
 263  */
 264 ImageDimensions FitForFitWidth(ImageDimensions target, ImageDimensions source)
 265 {
 266   DALI_ASSERT_DEBUG(source.GetX() > 0 && "Cant fit a zero-dimension rectangle.");
 267   const float scale = target.GetX() / float(source.GetX());
 268
 269   // Do no scaling at all if the result would increase area:
 270   if(scale >= 1.0f)
 271   {
 272     return source;
 273   }
 274   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 275 }
 276
 277 /**
 278  * @brief Work out the dimensions for a uniform scaling of the input to map it
 279  * into the target while effecting FIT_HEIGHT scaling mode.
 280  */
 281 ImageDimensions FitForFitHeight(ImageDimensions target, ImageDimensions source)
 282 {
 283   DALI_ASSERT_DEBUG(source.GetY() > 0 && "Cant fit a zero-dimension rectangle.");
 284   const float scale = target.GetY() / float(source.GetY());
 285
 286   // Do no scaling at all if the result would increase area:
 287   if(scale >= 1.0f)
 288   {
 289     return source;
 290   }
 291
 292   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 293 }
 294
 295 /**
 296  * @brief Generate the rectangle to use as the target of a pixel sampling pass
 297  * (e.g., nearest or linear).
 298  */
 299 ImageDimensions FitToScalingMode(ImageDimensions requestedSize, ImageDimensions sourceSize, FittingMode::Type fittingMode)
 300 {
 301   ImageDimensions fitDimensions;
 302   switch(fittingMode)
 303   {
 304     case FittingMode::SHRINK_TO_FIT:
 305     {
 306       fitDimensions = FitForShrinkToFit(requestedSize, sourceSize);
 307       break;
 308     }
 309     case FittingMode::SCALE_TO_FILL:
 310     case FittingMode::VISUAL_FITTING:
 311     {
 312       fitDimensions = FitForScaleToFill(requestedSize, sourceSize);
 313       break;
 314     }
 315     case FittingMode::FIT_WIDTH:
 316     {
 317       fitDimensions = FitForFitWidth(requestedSize, sourceSize);
 318       break;
 319     }
 320     case FittingMode::FIT_HEIGHT:
 321     {
 322       fitDimensions = FitForFitHeight(requestedSize, sourceSize);
 323       break;
 324     }
 325   }
 326
 327   return fitDimensions;
 328 }
 329
 330 /**
 331  * @brief Calculate the number of lines on the X and Y axis that need to be
 332  * either added or removed with repect to the specified fitting mode.
 333  * (e.g., nearest or linear).
 334  * @param[in]     sourceSize      The size of the source image
 335  * @param[in]     fittingMode     The fitting mode to use
 336  * @param[in/out] requestedSize   The target size that the image will be fitted to.
 337  *                                If the source image is smaller than the requested size, the source is not scaled up.
 338  *                                So we reduce the target size while keeping aspect by lowering resolution.
 339  * @param[out]    scanlinesToCrop The number of scanlines to remove from the image (can be negative to represent Y borders required)
 340  * @param[out]    columnsToCrop   The number of columns to remove from the image (can be negative to represent X borders required)
 341  */
 342 void CalculateBordersFromFittingMode(ImageDimensions sourceSize, FittingMode::Type fittingMode, ImageDimensions& requestedSize, int& scanlinesToCrop, int& columnsToCrop)
 343 {
 344   const int   sourceWidth(static_cast<int>(sourceSize.GetWidth()));
 345   const int   sourceHeight(static_cast<int>(sourceSize.GetHeight()));
 346   const float targetAspect(static_cast<float>(requestedSize.GetWidth()) / static_cast<float>(requestedSize.GetHeight()));
 347   int         finalWidth  = 0;
 348   int         finalHeight = 0;
 349
 350   switch(fittingMode)
 351   {
 352     case FittingMode::FIT_WIDTH:
 353     {
 354       finalWidth  = sourceWidth;
 355       finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 356       break;
 357     }
 358
 359     case FittingMode::FIT_HEIGHT:
 360     {
 361       finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 362       finalHeight = sourceHeight;
 363       break;
 364     }
 365
 366     case FittingMode::SHRINK_TO_FIT:
 367     {
 368       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 369       if(sourceAspect > targetAspect)
 370       {
 371         finalWidth  = sourceWidth;
 372         finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 373       }
 374       else
 375       {
 376         finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 377         finalHeight = sourceHeight;
 378       }
 379       break;
 380     }
 381
 382     case FittingMode::SCALE_TO_FILL:
 383     case FittingMode::VISUAL_FITTING:
 384     {
 385       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 386       if(sourceAspect > targetAspect)
 387       {
 388         finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 389         finalHeight = sourceHeight;
 390       }
 391       else
 392       {
 393         finalWidth  = sourceWidth;
 394         finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 395       }
 396       break;
 397     }
 398   }
 399
 400   // Clamp if overflowed
 401   if(DALI_UNLIKELY(finalWidth > std::numeric_limits<uint16_t>::max()))
 402   {
 403     finalWidth = std::numeric_limits<uint16_t>::max();
 404   }
 405   if(DALI_UNLIKELY(finalHeight > std::numeric_limits<uint16_t>::max()))
 406   {
 407     finalHeight = std::numeric_limits<uint16_t>::max();
 408   }
 409
 410   columnsToCrop   = -(finalWidth - sourceWidth);
 411   scanlinesToCrop = -(finalHeight - sourceHeight);
 412
 413   requestedSize.SetWidth(static_cast<uint16_t>(finalWidth));
 414   requestedSize.SetHeight(static_cast<uint16_t>(finalHeight));
 415 }
 416
 417 /**
 418  * @brief Construct a pixel buffer object from a copy of the pixel array passed in.
 419  */
 420 Dali::Devel::PixelBuffer MakePixelBuffer(const uint8_t* const pixels, Pixel::Format pixelFormat, uint32_t width, uint32_t height)
 421 {
 422   DALI_ASSERT_DEBUG(pixels && "Null bitmap buffer to copy.");
 423
 424   // Allocate a pixel buffer to hold the image passed in:
 425   auto newBitmap = Dali::Devel::PixelBuffer::New(width, height, pixelFormat);
 426
 427   // Copy over the pixels from the downscaled image that was generated in-place in the pixel buffer of the input bitmap:
 428   memcpy(newBitmap.GetBuffer(), pixels, width * height * Pixel::GetBytesPerPixel(pixelFormat));
 429   return newBitmap;
 430 }
 431
 432 /**
 433  * @brief Work out the desired width and height, accounting for zeros.
 434  *
 435  * @param[in] bitmapWidth Width of image before processing.
 436  * @param[in] bitmapHeight Height of image before processing.
 437  * @param[in] requestedWidth Width of area to scale image into. Can be zero.
 438  * @param[in] requestedHeight Height of area to scale image into. Can be zero.
 439  * @return Dimensions of area to scale image into after special rules are applied.
 440  */
 441 ImageDimensions CalculateDesiredDimensions(uint32_t bitmapWidth, uint32_t bitmapHeight, uint32_t requestedWidth, uint32_t requestedHeight, FittingMode::Type fittingMode)
 442 {
 443   uint32_t maxSize = Dali::GetMaxTextureSize();
 444
 445   // If no dimensions have been requested, default to the source ones:
 446   if(requestedWidth == 0 && requestedHeight == 0)
 447   {
 448     if(bitmapWidth <= maxSize && bitmapHeight <= maxSize)
 449     {
 450       return ImageDimensions(bitmapWidth, bitmapHeight);
 451     }
 452     else
 453     {
 454       // Calculate the size from the max texture size and the source image aspect ratio
 455       if(bitmapWidth > bitmapHeight)
 456       {
 457         return ImageDimensions(maxSize, bitmapHeight * maxSize / static_cast<float>(bitmapWidth) + 0.5f);
 458       }
 459       else
 460       {
 461         return ImageDimensions(bitmapWidth * maxSize / static_cast<float>(bitmapHeight) + 0.5f, maxSize);
 462       }
 463     }
 464   }
 465
 466   // If both dimensions have values requested, use them both:
 467   if(requestedWidth != 0 && requestedHeight != 0)
 468   {
 469     DALI_ASSERT_DEBUG( (bitmapWidth > 0 && bitmapHeight > 0) && "Bitmap dimensions are zero");
 470
 471     if(fittingMode == FittingMode::VISUAL_FITTING)
 472     {
 473       uint32_t adjustedDesiredWidth, adjustedDesiredHeight;
 474       float aspectOfDesiredSize = (float)requestedHeight / (float)requestedWidth;
 475       float aspectOfImageSize = (float)bitmapHeight / (float)bitmapWidth;
 476       if (aspectOfImageSize > aspectOfDesiredSize)
 477       {
 478         adjustedDesiredWidth = requestedWidth;
 479         adjustedDesiredHeight = static_cast<uint64_t>(bitmapHeight) * requestedWidth / bitmapWidth;
 480       }
 481       else
 482       {
 483         adjustedDesiredWidth = static_cast<uint64_t>(bitmapWidth) * requestedHeight / bitmapHeight;
 484         adjustedDesiredHeight = requestedHeight;
 485       }
 486
 487       requestedWidth = adjustedDesiredWidth;
 488       requestedHeight = adjustedDesiredHeight;
 489     }
 490
 491     if(requestedWidth <= maxSize && requestedHeight <= maxSize)
 492     {
 493       return ImageDimensions(requestedWidth, requestedHeight);
 494     }
 495     else
 496     {
 497       // Calculate the size from the max texture size and the source image aspect ratio
 498       if(requestedWidth > requestedHeight)
 499       {
 500         return ImageDimensions(maxSize, requestedHeight * maxSize / static_cast<float>(requestedWidth) + 0.5f);
 501       }
 502       else
 503       {
 504         return ImageDimensions(requestedWidth * maxSize / static_cast<float>(requestedHeight) + 0.5f, maxSize);
 505       }
 506     }
 507   }
 508
 509   // Only one of the dimensions has been requested. Calculate the other from
 510   // the requested one and the source image aspect ratio:
 511   if(requestedWidth != 0)
 512   {
 513     requestedWidth = std::min(requestedWidth, maxSize);
 514     return ImageDimensions(requestedWidth, bitmapHeight / float(bitmapWidth) * requestedWidth + 0.5f);
 515   }
 516
 517   requestedHeight = std::min(requestedHeight, maxSize);
 518   return ImageDimensions(bitmapWidth / float(bitmapHeight) * requestedHeight + 0.5f, requestedHeight);
 519 }
 520
 521 /**
 522  * @brief Rotates the given buffer @p pixelsIn 90 degrees counter clockwise.
 523  *
 524  * @note It allocates memory for the returned @p pixelsOut buffer.
 525  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 526  * @note It may fail if malloc() fails to allocate memory.
 527  *
 528  * @param[in] pixelsIn The input buffer.
 529  * @param[in] widthIn The width of the input buffer.
 530  * @param[in] heightIn The height of the input buffer.
 531  * @param[in] strideIn The stride of the input buffer.
 532  * @param[in] pixelSize The size of the pixel.
 533  * @param[out] pixelsOut The rotated output buffer.
 534  * @param[out] widthOut The width of the output buffer.
 535  * @param[out] heightOut The height of the output buffer.
 536  *
 537  * @return Whether the rotation succeeded.
 538  */
 539 bool Rotate90(const uint8_t* const pixelsIn,
 540               uint32_t             widthIn,
 541               uint32_t             heightIn,
 542               uint32_t             strideIn,
 543               uint32_t             pixelSize,
 544               uint8_t*&            pixelsOut,
 545               uint32_t&            widthOut,
 546               uint32_t&            heightOut)
 547 {
 548   // The new size of the image.
 549   widthOut  = heightIn;
 550   heightOut = widthIn;
 551
 552   // Allocate memory for the rotated buffer.
 553   // Output buffer is tightly packed
 554   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 555   if(nullptr == pixelsOut)
 556   {
 557     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
 558     widthOut  = 0u;
 559     heightOut = 0u;
 560
 561     // Return if the memory allocations fails.
 562     return false;
 563   }
 564
 565   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_ROTATE_90");
 566
 567   // Rotate the buffer.
 568   for(uint32_t y = 0u; y < heightIn; ++y)
 569   {
 570     const uint32_t srcLineIndex = y * strideIn;
 571     const uint32_t dstX         = y;
 572     for(uint32_t x = 0u; x < widthIn; ++x)
 573     {
 574       const uint32_t dstY     = heightOut - x - 1u;
 575       const uint32_t dstIndex = pixelSize * (dstY * widthOut + dstX);
 576       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 577
 578       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 579       {
 580         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 581       }
 582     }
 583   }
 584
 585   return true;
 586 }
 587
 588 /**
 589  * @brief Rotates the given buffer @p pixelsIn 180 degrees counter clockwise.
 590  *
 591  * @note It allocates memory for the returned @p pixelsOut buffer.
 592  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 593  * @note It may fail if malloc() fails to allocate memory.
 594  *
 595  * @param[in] pixelsIn The input buffer.
 596  * @param[in] widthIn The width of the input buffer.
 597  * @param[in] heightIn The height of the input buffer.
 598  * @param[in] strideIn The stride of the input buffer.
 599  * @param[in] pixelSize The size of the pixel.
 600  * @param[out] pixelsOut The rotated output buffer.
 601  *
 602  * @return Whether the rotation succeeded.
 603  */
 604 bool Rotate180(const uint8_t* const pixelsIn,
 605                uint32_t             widthIn,
 606                uint32_t             heightIn,
 607                uint32_t             strideIn,
 608                uint32_t             pixelSize,
 609                uint8_t*&            pixelsOut)
 610 {
 611   // Allocate memory for the rotated buffer.
 612   // Output buffer is tightly packed
 613   pixelsOut = static_cast<uint8_t*>(malloc(widthIn * heightIn * pixelSize));
 614   if(nullptr == pixelsOut)
 615   {
 616     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthIn, heightIn, pixelSize);
 617     // Return if the memory allocations fails.
 618     return false;
 619   }
 620
 621   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_ROTATE_180");
 622
 623   // Rotate the buffer.
 624   for(uint32_t y = 0u; y < heightIn; ++y)
 625   {
 626     const uint32_t srcLineIndex = y * strideIn;
 627     const uint32_t dstY         = heightIn - y - 1u;
 628     for(uint32_t x = 0u; x < widthIn; ++x)
 629     {
 630       const uint32_t dstX     = widthIn - x - 1u;
 631       const uint32_t dstIndex = pixelSize * (dstY * widthIn + dstX);
 632       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 633
 634       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 635       {
 636         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 637       }
 638     }
 639   }
 640
 641   return true;
 642 }
 643
 644 /**
 645  * @brief Rotates the given buffer @p pixelsIn 270 degrees counter clockwise.
 646  *
 647  * @note It allocates memory for the returned @p pixelsOut buffer.
 648  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 649  * @note It may fail if malloc() fails to allocate memory.
 650  *
 651  * @param[in] pixelsIn The input buffer.
 652  * @param[in] widthIn The width of the input buffer.
 653  * @param[in] heightIn The height of the input buffer.
 654  * @param[in] strideIn The stride of the input buffer.
 655  * @param[in] pixelSize The size of the pixel.
 656  * @param[out] pixelsOut The rotated output buffer.
 657  * @param[out] widthOut The width of the output buffer.
 658  * @param[out] heightOut The height of the output buffer.
 659  *
 660  * @return Whether the rotation succeeded.
 661  */
 662 bool Rotate270(const uint8_t* const pixelsIn,
 663                uint32_t             widthIn,
 664                uint32_t             heightIn,
 665                uint32_t             strideIn,
 666                uint32_t             pixelSize,
 667                uint8_t*&            pixelsOut,
 668                uint32_t&            widthOut,
 669                uint32_t&            heightOut)
 670 {
 671   // The new size of the image.
 672   widthOut  = heightIn;
 673   heightOut = widthIn;
 674
 675   // Allocate memory for the rotated buffer.
 676   // Output buffer is tightly packed
 677   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 678   if(nullptr == pixelsOut)
 679   {
 680     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
 681     widthOut  = 0u;
 682     heightOut = 0u;
 683
 684     // Return if the memory allocations fails.
 685     return false;
 686   }
 687
 688   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_ROTATE_270");
 689
 690   // Rotate the buffer.
 691   for(uint32_t y = 0u; y < heightIn; ++y)
 692   {
 693     const uint32_t srcLineIndex = y * strideIn;
 694     const uint32_t dstX         = widthOut - y - 1u;
 695     for(uint32_t x = 0u; x < widthIn; ++x)
 696     {
 697       const uint32_t dstY     = x;
 698       const uint32_t dstIndex = pixelSize * (dstY * widthOut + dstX);
 699       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 700
 701       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 702       {
 703         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 704       }
 705     }
 706   }
 707
 708   return true;
 709 }
 710
 711 /**
 712  * @brief Skews a row horizontally (with filtered weights)
 713  *
 714  * @note Limited to 45 degree skewing only.
 715  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 716  *
 717  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 718  * @param[in] srcWidth The width of the input pixel buffer.
 719  * @param[in] srcStride The stride of the input pixel buffer.
 720  * @param[in] pixelSize The size of the pixel.
 721  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 722  * @param[in] dstWidth The width of the output pixel buffer.
 723  * @param[in] row The row index.
 724  * @param[in] offset The skew offset.
 725  * @param[in] weight The relative weight of right pixel.
 726  */
 727 void HorizontalSkew(const uint8_t* const srcBufferPtr,
 728                     uint32_t             srcWidth,
 729                     uint32_t             srcStride,
 730                     uint32_t             pixelSize,
 731                     uint8_t*&            dstBufferPtr,
 732                     uint32_t             dstWidth,
 733                     uint32_t             row,
 734                     int32_t              offset,
 735                     float                weight)
 736 {
 737   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_HORIZONTAL_SKEW");
 738   if(offset > 0)
 739   {
 740     // Fill gap left of skew with background.
 741     memset(dstBufferPtr + row * pixelSize * dstWidth, 0u, pixelSize * offset);
 742   }
 743
 744   uint8_t oldLeft[4u] = {0u, 0u, 0u, 0u};
 745
 746   for(uint32_t i = 0u; i < srcWidth; ++i)
 747   {
 748     // Loop through row pixels
 749     const uint32_t srcIndex = pixelSize * (row * srcStride + i);
 750
 751     uint8_t src[4u] = {0u, 0u, 0u, 0u};
 752     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 753     {
 754       src[channel] = *(srcBufferPtr + srcIndex + channel);
 755     }
 756
 757     // Calculate weights
 758     uint8_t left[4u] = {0u, 0u, 0u, 0u};
 759     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 760     {
 761       left[channel] = static_cast<uint8_t>(static_cast<float>(src[channel]) * weight);
 762
 763       // Update left over on source
 764       src[channel] -= (left[channel] - oldLeft[channel]);
 765     }
 766
 767     // Check boundaries
 768     if((static_cast<int32_t>(i) + offset >= 0) && (i + offset < dstWidth))
 769     {
 770       const uint32_t dstIndex = pixelSize * (row * dstWidth + i + offset);
 771
 772       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 773       {
 774         *(dstBufferPtr + dstIndex + channel) = src[channel];
 775       }
 776     }
 777
 778     // Save leftover for next pixel in scan
 779     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 780     {
 781       oldLeft[channel] = left[channel];
 782     }
 783   }
 784
 785   // Go to rightmost point of skew
 786   int32_t i = std::max(static_cast<int32_t>(srcWidth) + offset, -static_cast<int32_t>(dstWidth * row));
 787   if(i < static_cast<int32_t>(dstWidth))
 788   {
 789     // If still in image bounds, put leftovers there
 790     const uint32_t dstIndex = pixelSize * (row * dstWidth + i);
 791
 792     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 793     {
 794       *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 795     }
 796
 797     // Clear to the right of the skewed line with background
 798     ++i;
 799     memset(dstBufferPtr + pixelSize * (row * dstWidth + i), 0u, pixelSize * (dstWidth - i));
 800   }
 801 }
 802
 803 /**
 804  * @brief Skews a column vertically (with filtered weights)
 805  *
 806  * @note Limited to 45 degree skewing only.
 807  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 808  *
 809  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 810  * @param[in] srcWidth The width of the input pixel buffer.
 811  * @param[in] srcHeight The height of the input pixel buffer.
 812  * @param[in] srcStride The stride of the input pixel buffer.
 813  * @param[in] pixelSize The size of the pixel.
 814  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 815  * @param[in] dstWidth The width of the output pixel buffer.
 816  * @param[in] dstHeight The height of the output pixel buffer.
 817  * @param[in] column The column index.
 818  * @param[in] offset The skew offset.
 819  * @param[in] weight The relative weight of uppeer pixel.
 820  */
 821 void VerticalSkew(const uint8_t* const srcBufferPtr,
 822                   uint32_t             srcWidth,
 823                   uint32_t             srcHeight,
 824                   uint32_t             srcStride,
 825                   uint32_t             pixelSize,
 826                   uint8_t*&            dstBufferPtr,
 827                   uint32_t             dstWidth,
 828                   uint32_t             dstHeight,
 829                   uint32_t             column,
 830                   int32_t              offset,
 831                   float                weight)
 832 {
 833   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_VERTICAL_SKEW");
 834   for(int32_t i = 0; i < offset; ++i)
 835   {
 836     // Fill gap above skew with background
 837     const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 838
 839     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 840     {
 841       *(dstBufferPtr + dstIndex + channel) = 0u;
 842     }
 843   }
 844
 845   uint8_t oldLeft[4u] = {0u, 0u, 0u, 0u};
 846
 847   int32_t yPos = 0;
 848
 849   for(uint32_t i = 0u; i < srcHeight; ++i)
 850   {
 851     // Loop through column pixels
 852     const uint32_t srcIndex = pixelSize * (i * srcStride + column);
 853
 854     uint8_t src[4u] = {0u, 0u, 0u, 0u};
 855     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 856     {
 857       src[channel] = *(srcBufferPtr + srcIndex + channel);
 858     }
 859
 860     yPos = static_cast<int32_t>(i) + offset;
 861
 862     // Calculate weights
 863     uint8_t left[4u] = {0u, 0u, 0u, 0u};
 864     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 865     {
 866       left[channel] = static_cast<uint8_t>(static_cast<float>(src[channel]) * weight);
 867       // Update left over on source
 868       src[channel] -= (left[channel] - oldLeft[channel]);
 869     }
 870
 871     // Check boundaries
 872     if((yPos >= 0) && (yPos < static_cast<int32_t>(dstHeight)))
 873     {
 874       const uint32_t dstIndex = pixelSize * (yPos * dstWidth + column);
 875
 876       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 877       {
 878         *(dstBufferPtr + dstIndex + channel) = src[channel];
 879       }
 880     }
 881
 882     // Save leftover for next pixel in scan
 883     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 884     {
 885       oldLeft[channel] = left[channel];
 886     }
 887   }
 888
 889   // Go to bottom point of skew
 890   uint32_t i = 0;
 891
 892   if(yPos >= 0)
 893   {
 894     i = static_cast<uint32_t>(yPos);
 895     if(i < dstHeight)
 896     {
 897       // If still in image bounds, put leftovers there
 898       const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 899
 900       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 901       {
 902         *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 903       }
 904       ++i;
 905     }
 906   }
 907
 908   while(i < dstHeight)
 909   {
 910     // Clear below skewed line with background
 911     const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 912
 913     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 914     {
 915       *(dstBufferPtr + dstIndex + channel) = 0u;
 916     }
 917     ++i;
 918   }
 919 }
 920 } // namespace
 921
 922 ImageDimensions CalculateDesiredDimensions(ImageDimensions rawDimensions, ImageDimensions requestedDimensions, FittingMode::Type fittingMode)
 923 {
 924   return CalculateDesiredDimensions(rawDimensions.GetWidth(), rawDimensions.GetHeight(), requestedDimensions.GetWidth(), requestedDimensions.GetHeight(), fittingMode);
 925 }
 926
 927 /**
 928  * @brief Apply cropping and padding for specified fitting mode.
 929  *
 930  * Once the bitmap has been (optionally) downscaled to an appropriate size, this method performs alterations
 931  * based on the fitting mode.
 932  *
 933  * This will add vertical or horizontal borders if necessary.
 934  * Crop the source image data vertically or horizontally if necessary.
 935  * The aspect of the source image is preserved.
 936  * If the source image is smaller than the desired size, the algorithm will modify the the newly created
 937  *   bitmaps dimensions to only be as large as necessary, as a memory saving optimization. This will cause
 938  *   GPU scaling to be performed at render time giving the same result with less texture traversal.
 939  *
 940  * @param[in] bitmap            The source pixel buffer to perform modifications on.
 941  * @param[in] desiredDimensions The target dimensions to aim to fill based on the fitting mode.
 942  * @param[in] fittingMode       The fitting mode to use.
 943  *
 944  * @return                      A new bitmap with the padding and cropping required for fitting mode applied.
 945  *                              If no modification is needed or possible, the passed in bitmap is returned.
 946  */
 947 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode);
 948
 949 /**
 950  * @brief Adds horizontal or vertical borders to the source image.
 951  *
 952  * @param[in] targetPixels     The destination image pointer to draw the borders on.
 953  * @param[in] bytesPerPixel    The number of bytes per pixel of the target pixel buffer.
 954  * @param[in] targetDimensions The dimensions of the destination image.
 955  * @param[in] padDimensions    The columns and scanlines to pad with borders.
 956  */
 957 void AddBorders(PixelBuffer* targetPixels, const uint32_t bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions);
 958
 959 Dali::Devel::PixelBuffer ApplyAttributesToBitmap(Dali::Devel::PixelBuffer bitmap, ImageDimensions dimensions, FittingMode::Type fittingMode, SamplingMode::Type samplingMode)
 960 {
 961   if(bitmap)
 962   {
 963     // Calculate the desired box, accounting for a possible zero component:
 964     const ImageDimensions desiredDimensions = CalculateDesiredDimensions(bitmap.GetWidth(), bitmap.GetHeight(), dimensions.GetWidth(), dimensions.GetHeight(), fittingMode);
 965
 966     // If a different size than the raw one has been requested, resize the image
 967     // maximally using a repeated box filter without making it smaller than the
 968     // requested size in either dimension:
 969     bitmap = DownscaleBitmap(bitmap, desiredDimensions, fittingMode, samplingMode);
 970
 971     // Cut the bitmap according to the desired width and height so that the
 972     // resulting bitmap has the same aspect ratio as the desired dimensions.
 973     // Add crop and add borders if necessary depending on fitting mode.
 974     if(bitmap)
 975     {
 976       bitmap = CropAndPadForFittingMode(bitmap, desiredDimensions, fittingMode);
 977     }
 978   }
 979
 980   return bitmap;
 981 }
 982
 983 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode)
 984 {
 985   const uint32_t inputWidth  = bitmap.GetWidth();
 986   const uint32_t inputHeight = bitmap.GetHeight();
 987   const uint32_t inputStride = bitmap.GetStride();
 988
 989   if(desiredDimensions.GetWidth() < 1u || desiredDimensions.GetHeight() < 1u)
 990   {
 991     DALI_LOG_WARNING("Image scaling aborted as desired dimensions too small (%u, %u).\n", desiredDimensions.GetWidth(), desiredDimensions.GetHeight());
 992   }
 993   else if(inputWidth != desiredDimensions.GetWidth() || inputHeight != desiredDimensions.GetHeight())
 994   {
 995     // Calculate any padding or cropping that needs to be done based on the fitting mode.
 996     // Note: If the desired size is larger than the original image, the desired size will be
 997     // reduced while maintaining the aspect, in order to save unnecessary memory usage.
 998     int scanlinesToCrop = 0;
 999     int columnsToCrop   = 0;
1000
1001     CalculateBordersFromFittingMode(ImageDimensions(inputWidth, inputHeight), fittingMode, desiredDimensions, scanlinesToCrop, columnsToCrop);
1002
1003     uint32_t desiredWidth(desiredDimensions.GetWidth());
1004     uint32_t desiredHeight(desiredDimensions.GetHeight());
1005
1006     // Action the changes by making a new bitmap with the central part of the loaded one if required.
1007     if(scanlinesToCrop != 0 || columnsToCrop != 0)
1008     {
1009       // Split the adding and removing of scanlines and columns into separate variables,
1010       // so we can use one piece of generic code to action the changes.
1011       uint32_t scanlinesToPad = 0;
1012       uint32_t columnsToPad   = 0;
1013       if(scanlinesToCrop < 0)
1014       {
1015         scanlinesToPad  = -scanlinesToCrop;
1016         scanlinesToCrop = 0;
1017       }
1018       if(columnsToCrop < 0)
1019       {
1020         columnsToPad  = -columnsToCrop;
1021         columnsToCrop = 0;
1022       }
1023
1024       // If there is no filtering, then the final image size can become very large, exit if larger than maximum.
1025       if((desiredWidth > MAXIMUM_TARGET_BITMAP_SIZE) || (desiredHeight > MAXIMUM_TARGET_BITMAP_SIZE) ||
1026          (columnsToPad > MAXIMUM_TARGET_BITMAP_SIZE) || (scanlinesToPad > MAXIMUM_TARGET_BITMAP_SIZE))
1027       {
1028         DALI_LOG_WARNING("Image scaling aborted as final dimensions too large (%u, %u).\n", desiredWidth, desiredHeight);
1029         return bitmap;
1030       }
1031
1032       DALI_TRACE_BEGIN_WITH_MESSAGE_GENERATOR(gTraceFilter, "DALI_CROP_AND_PAD_BITMAP", [&](std::ostringstream& oss) {
1033         oss << "[origin:" << inputWidth << "x" << inputHeight << " ";
1034         oss << "desired:" << desiredWidth << "x" << desiredHeight << " ";
1035         oss << "fittingMode:" << fittingMode << "]";
1036       });
1037
1038       // Create new PixelBuffer with the desired size.
1039       const auto pixelFormat = bitmap.GetPixelFormat();
1040
1041       auto croppedBitmap = Devel::PixelBuffer::New(desiredWidth, desiredHeight, pixelFormat);
1042
1043       // Add some pre-calculated offsets to the bitmap pointers so this is not done within a loop.
1044       // The cropping is added to the source pointer, and the padding is added to the destination.
1045       const auto               bytesPerPixel      = Pixel::GetBytesPerPixel(pixelFormat);
1046       const PixelBuffer* const sourcePixels       = bitmap.GetBuffer() + ((((scanlinesToCrop / 2) * inputStride) + (columnsToCrop / 2)) * bytesPerPixel);
1047       PixelBuffer* const       targetPixels       = croppedBitmap.GetBuffer();
1048       PixelBuffer* const       targetPixelsActive = targetPixels + ((((scanlinesToPad / 2) * desiredWidth) + (columnsToPad / 2)) * bytesPerPixel);
1049       DALI_ASSERT_DEBUG(sourcePixels && targetPixels);
1050
1051       // Copy the image data to the new bitmap.
1052       // Optimize to a single memcpy if the left and right edges don't need a crop or a pad.
1053       uint32_t outputSpan(desiredWidth * bytesPerPixel);
1054       if(columnsToCrop == 0 && columnsToPad == 0 && inputStride == inputWidth)
1055       {
1056         memcpy(targetPixelsActive, sourcePixels, (desiredHeight - scanlinesToPad) * outputSpan);
1057       }
1058       else
1059       {
1060         // The width needs to change (due to either a crop or a pad), so we copy a scanline at a time.
1061         // Precalculate any constants to optimize the inner loop.
1062         const uint32_t inputSpan(inputStride * bytesPerPixel);
1063         const uint32_t copySpan((desiredWidth - columnsToPad) * bytesPerPixel);
1064         const uint32_t scanlinesToCopy(desiredHeight - scanlinesToPad);
1065
1066         for(uint32_t y = 0; y < scanlinesToCopy; ++y)
1067         {
1068           memcpy(&targetPixelsActive[y * outputSpan], &sourcePixels[y * inputSpan], copySpan);
1069         }
1070       }
1071
1072       // Add vertical or horizontal borders to the final image (if required).
1073       desiredDimensions.SetWidth(desiredWidth);
1074       desiredDimensions.SetHeight(desiredHeight);
1075       AddBorders(croppedBitmap.GetBuffer(), bytesPerPixel, desiredDimensions, ImageDimensions(columnsToPad, scanlinesToPad));
1076       // Overwrite the loaded bitmap with the cropped version
1077       bitmap = croppedBitmap;
1078
1079       DALI_TRACE_END(gTraceFilter, "DALI_CROP_AND_PAD_BITMAP");
1080     }
1081   }
1082
1083   return bitmap;
1084 }
1085
1086 void AddBorders(PixelBuffer* targetPixels, const uint32_t bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions)
1087 {
1088   // Assign ints for faster access.
1089   uint32_t desiredWidth(targetDimensions.GetWidth());
1090   uint32_t desiredHeight(targetDimensions.GetHeight());
1091   uint32_t columnsToPad(padDimensions.GetWidth());
1092   uint32_t scanlinesToPad(padDimensions.GetHeight());
1093   uint32_t outputSpan(desiredWidth * bytesPerPixel);
1094
1095   // Add letterboxing (symmetrical borders) if needed.
1096   if(scanlinesToPad > 0)
1097   {
1098     // Add a top border. Note: This is (deliberately) rounded down if padding is an odd number.
1099     memset(targetPixels, BORDER_FILL_VALUE, (scanlinesToPad / 2) * outputSpan);
1100
1101     // We subtract scanlinesToPad/2 from scanlinesToPad so that we have the correct
1102     // offset for odd numbers (as the top border is 1 pixel smaller in these cases.
1103     uint32_t bottomBorderHeight = scanlinesToPad - (scanlinesToPad / 2);
1104
1105     // Bottom border.
1106     memset(&targetPixels[(desiredHeight - bottomBorderHeight) * outputSpan], BORDER_FILL_VALUE, bottomBorderHeight * outputSpan);
1107   }
1108   else if(columnsToPad > 0)
1109   {
1110     // Add a left and right border.
1111     // Left:
1112     // Pre-calculate span size outside of loop.
1113     uint32_t leftBorderSpanWidth((columnsToPad / 2) * bytesPerPixel);
1114     for(uint32_t y = 0; y < desiredHeight; ++y)
1115     {
1116       memset(&targetPixels[y * outputSpan], BORDER_FILL_VALUE, leftBorderSpanWidth);
1117     }
1118
1119     // Right:
1120     // Pre-calculate the initial x offset as it is always the same for a small optimization.
1121     // We subtract columnsToPad/2 from columnsToPad so that we have the correct
1122     // offset for odd numbers (as the left border is 1 pixel smaller in these cases.
1123     uint32_t           rightBorderWidth = columnsToPad - (columnsToPad / 2);
1124     PixelBuffer* const destPixelsRightBorder(targetPixels + ((desiredWidth - rightBorderWidth) * bytesPerPixel));
1125     uint32_t           rightBorderSpanWidth = rightBorderWidth * bytesPerPixel;
1126
1127     for(uint32_t y = 0; y < desiredHeight; ++y)
1128     {
1129       memset(&destPixelsRightBorder[y * outputSpan], BORDER_FILL_VALUE, rightBorderSpanWidth);
1130     }
1131   }
1132 }
1133
1134 Dali::Devel::PixelBuffer DownscaleBitmap(Dali::Devel::PixelBuffer bitmap,
1135                                          ImageDimensions          desired,
1136                                          FittingMode::Type        fittingMode,
1137                                          SamplingMode::Type       samplingMode)
1138 {
1139   // Source dimensions as loaded from resources (e.g. filesystem):
1140   auto bitmapWidth  = bitmap.GetWidth();
1141   auto bitmapHeight = bitmap.GetHeight();
1142   auto bitmapStride = bitmap.GetStride();
1143   // Desired dimensions (the rectangle to fit the source image to):
1144   auto desiredWidth  = desired.GetWidth();
1145   auto desiredHeight = desired.GetHeight();
1146
1147   Dali::Devel::PixelBuffer outputBitmap{bitmap};
1148
1149   // If a different size than the raw one has been requested, resize the image:
1150   if(
1151     (desiredWidth > 0.0f) && (desiredHeight > 0.0f) &&
1152     ((desiredWidth < bitmapWidth) || (desiredHeight < bitmapHeight)))
1153   {
1154     DALI_TRACE_BEGIN_WITH_MESSAGE_GENERATOR(gTraceFilter, "DALI_DOWNSCALE_BITMAP", [&](std::ostringstream& oss) {
1155       oss << "[origin:" << bitmapWidth << "x" << bitmapHeight << " ";
1156       oss << "desired:" << desiredWidth << "x" << desiredHeight << " ";
1157       oss << "fittingMode:" << fittingMode << " ";
1158       oss << "samplingMode:" << samplingMode << "]";
1159     });
1160     auto pixelFormat = bitmap.GetPixelFormat();
1161
1162     // Do the fast power of 2 iterated box filter to get to roughly the right side if the filter mode requests that:
1163     uint32_t shrunkWidth = -1, shrunkHeight = -1, outStride = -1;
1164     DownscaleInPlacePow2(bitmap.GetBuffer(), pixelFormat, bitmapWidth, bitmapHeight, bitmapStride, desiredWidth, desiredHeight, fittingMode, samplingMode, shrunkWidth, shrunkHeight, outStride);
1165
1166     // Work out the dimensions of the downscaled bitmap, given the scaling mode and desired dimensions:
1167     const ImageDimensions filteredDimensions = FitToScalingMode(ImageDimensions(desiredWidth, desiredHeight), ImageDimensions(shrunkWidth, shrunkHeight), fittingMode);
1168     const uint32_t        filteredWidth      = filteredDimensions.GetWidth();
1169     const uint32_t        filteredHeight     = filteredDimensions.GetHeight();
1170
1171     // Run a filter to scale down the bitmap if it needs it:
1172     bool filtered = false;
1173     if(filteredWidth < shrunkWidth || filteredHeight < shrunkHeight)
1174     {
1175       if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR ||
1176          samplingMode == SamplingMode::NEAREST || samplingMode == SamplingMode::BOX_THEN_NEAREST)
1177       {
1178         outputBitmap = Dali::Devel::PixelBuffer::New(filteredWidth, filteredHeight, pixelFormat);
1179
1180         if(outputBitmap)
1181         {
1182           if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1183           {
1184             LinearSample(bitmap.GetBuffer(), ImageDimensions(shrunkWidth, shrunkHeight), outStride, pixelFormat, outputBitmap.GetBuffer(), filteredDimensions);
1185           }
1186           else
1187           {
1188             PointSample(bitmap.GetBuffer(), shrunkWidth, shrunkHeight, outStride, pixelFormat, outputBitmap.GetBuffer(), filteredWidth, filteredHeight);
1189           }
1190           filtered = true;
1191         }
1192       }
1193     }
1194     // Copy out the 2^x downscaled, box-filtered pixels if no secondary filter (point or linear) was applied:
1195     if(filtered == false && (shrunkWidth < bitmapWidth || shrunkHeight < bitmapHeight))
1196     {
1197       // The buffer is downscaled and it is tightly packed. We don't need to set a stride.
1198       outputBitmap = MakePixelBuffer(bitmap.GetBuffer(), pixelFormat, shrunkWidth, shrunkHeight);
1199     }
1200     DALI_TRACE_END_WITH_MESSAGE_GENERATOR(gTraceFilter, "DALI_DOWNSCALE_BITMAP", [&](std::ostringstream& oss) {
1201       oss << "[origin:" << bitmapWidth << "x" << bitmapHeight << " ";
1202       oss << "desired:" << desiredWidth << "x" << desiredHeight << " ";
1203       oss << "final:" << outputBitmap.GetWidth() << "x" << outputBitmap.GetHeight() << "]";
1204     });
1205   }
1206
1207   return outputBitmap;
1208 }
1209
1210 namespace
1211 {
1212 /**
1213  * @brief Returns whether to keep box filtering based on whether downscaled dimensions will overshoot the desired ones aty the next step.
1214  * @param test Which combination of the two dimensions matter for terminating the filtering.
1215  * @param scaledWidth The width of the current downscaled image.
1216  * @param scaledHeight The height of the current downscaled image.
1217  * @param desiredWidth The target width for the downscaling.
1218  * @param desiredHeight The target height for the downscaling.
1219  */
1220 bool ContinueScaling(BoxDimensionTest test, uint32_t scaledWidth, uint32_t scaledHeight, uint32_t desiredWidth, uint32_t desiredHeight)
1221 {
1222   bool           keepScaling = false;
1223   const uint32_t nextWidth   = scaledWidth >> 1u;
1224   const uint32_t nextHeight  = scaledHeight >> 1u;
1225
1226   if(nextWidth >= 1u && nextHeight >= 1u)
1227   {
1228     switch(test)
1229     {
1230       case BoxDimensionTestEither:
1231       {
1232         keepScaling = nextWidth >= desiredWidth || nextHeight >= desiredHeight;
1233         break;
1234       }
1235       case BoxDimensionTestBoth:
1236       {
1237         keepScaling = nextWidth >= desiredWidth && nextHeight >= desiredHeight;
1238         break;
1239       }
1240       case BoxDimensionTestX:
1241       {
1242         keepScaling = nextWidth >= desiredWidth;
1243         break;
1244       }
1245       case BoxDimensionTestY:
1246       {
1247         keepScaling = nextHeight >= desiredHeight;
1248         break;
1249       }
1250     }
1251   }
1252
1253   return keepScaling;
1254 }
1255
1256 /**
1257  * @brief A shared implementation of the overall iterative box filter
1258  * downscaling algorithm.
1259  *
1260  * Specialise this for particular pixel formats by supplying the number of bytes
1261  * per pixel and two functions: one for averaging pairs of neighbouring pixels
1262  * on a single scanline, and a second for averaging pixels at corresponding
1263  * positions on different scanlines.
1264  **/
1265 template<
1266   int BYTES_PER_PIXEL,
1267   void (*HalveScanlineInPlace)(uint8_t* const pixels, const uint32_t width),
1268   void (*AverageScanlines)(const uint8_t* const scanline1, const uint8_t* const __restrict__ scanline2, uint8_t* const outputScanline, const uint32_t width)>
1269 void DownscaleInPlacePow2Generic(uint8_t* const   pixels,
1270                                  const uint32_t   inputWidth,
1271                                  const uint32_t   inputHeight,
1272                                  const uint32_t   inputStride,
1273                                  const uint32_t   desiredWidth,
1274                                  const uint32_t   desiredHeight,
1275                                  BoxDimensionTest dimensionTest,
1276                                  uint32_t&        outWidth,
1277                                  uint32_t&        outHeight,
1278                                  uint32_t&        outStride)
1279 {
1280   if(pixels == 0)
1281   {
1282     return;
1283   }
1284   ValidateScalingParameters(inputWidth, inputHeight, desiredWidth, desiredHeight);
1285
1286   // Scale the image until it would be smaller than desired, stopping if the
1287   // resulting height or width would be less than 1:
1288   uint32_t scaledWidth = inputWidth, scaledHeight = inputHeight, stride = inputStride;
1289   while(ContinueScaling(dimensionTest, scaledWidth, scaledHeight, desiredWidth, desiredHeight))
1290   {
1291     const uint32_t lastWidth  = scaledWidth;
1292     const uint32_t lastStride = stride;
1293     scaledWidth >>= 1u;
1294     scaledHeight >>= 1u;
1295     stride = scaledWidth;
1296
1297     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Scaling to %u\t%u.\n", scaledWidth, scaledHeight);
1298
1299     const uint32_t lastScanlinePair = scaledHeight - 1;
1300
1301     // Scale pairs of scanlines until any spare one at the end is dropped:
1302     for(uint32_t y = 0; y <= lastScanlinePair; ++y)
1303     {
1304       // Scale two scanlines horizontally:
1305       HalveScanlineInPlace(&pixels[y * 2 * lastStride * BYTES_PER_PIXEL], lastWidth);
1306       HalveScanlineInPlace(&pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL], lastWidth);
1307
1308       // Scale vertical pairs of pixels while the last two scanlines are still warm in
1309       // the CPU cache(s):
1310       // Note, better access patterns for cache-coherence are possible for very large
1311       // images but even a 4k wide RGB888 image will use just 24kB of cache (4k pixels
1312       // * 3 Bpp * 2 scanlines) for two scanlines on the first iteration.
1313       AverageScanlines(
1314         &pixels[y * 2 * lastStride * BYTES_PER_PIXEL],
1315         &pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL],
1316         &pixels[y * scaledWidth * BYTES_PER_PIXEL],
1317         scaledWidth);
1318     }
1319   }
1320
1321   ///@note: we could finish off with one of two mutually exclusive passes, one squashing horizontally as far as possible, and the other vertically, if we knew a following cpu point or bilinear filter would restore the desired aspect ratio.
1322   outWidth  = scaledWidth;
1323   outHeight = scaledHeight;
1324   outStride = stride;
1325 }
1326
1327 } // namespace
1328
1329 void HalveScanlineInPlaceRGB888(uint8_t* const pixels, const uint32_t width)
1330 {
1331   DebugAssertScanlineParameters(pixels, width);
1332
1333   const uint32_t lastPair = EvenDown(width - 2);
1334
1335   /**
1336    * @code
1337    *  for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1338    * {
1339    *   // Load all the byte pixel components we need:
1340    *   const uint32_t c11 = pixels[pixel * 3];
1341    *   const uint32_t c12 = pixels[pixel * 3 + 1];
1342    *   const uint32_t c13 = pixels[pixel * 3 + 2];
1343    *   const uint32_t c21 = pixels[pixel * 3 + 3];
1344    *   const uint32_t c22 = pixels[pixel * 3 + 4];
1345    *   const uint32_t c23 = pixels[pixel * 3 + 5];
1346    *
1347    *   // Save the averaged byte pixel components:
1348    *   pixels[outPixel * 3]     = static_cast<uint8_t>(AverageComponent(c11, c21));
1349    *   pixels[outPixel * 3 + 1] = static_cast<uint8_t>(AverageComponent(c12, c22));
1350    *   pixels[outPixel * 3 + 2] = static_cast<uint8_t>(AverageComponent(c13, c23));
1351    * }
1352    *   @endcode
1353    */
1354   //@ToDo : Fix here if we found that collect 12 bytes == 3 uint32_t with 4 colors, and calculate in one-operation
1355   std::uint8_t* inPixelPtr  = pixels;
1356   std::uint8_t* outPixelPtr = pixels;
1357   for(std::uint32_t scanedPixelCount = 0; scanedPixelCount <= lastPair; scanedPixelCount += 2)
1358   {
1359     *(outPixelPtr + 0) = ((*(inPixelPtr + 0) ^ *(inPixelPtr + 3)) >> 1) + (*(inPixelPtr + 0) & *(inPixelPtr + 3));
1360     *(outPixelPtr + 1) = ((*(inPixelPtr + 1) ^ *(inPixelPtr + 4)) >> 1) + (*(inPixelPtr + 1) & *(inPixelPtr + 4));
1361     *(outPixelPtr + 2) = ((*(inPixelPtr + 2) ^ *(inPixelPtr + 5)) >> 1) + (*(inPixelPtr + 2) & *(inPixelPtr + 5));
1362     inPixelPtr += 6;
1363     outPixelPtr += 3;
1364   }
1365 }
1366
1367 void HalveScanlineInPlaceRGBA8888(uint8_t* const pixels, const uint32_t width)
1368 {
1369   DebugAssertScanlineParameters(pixels, width);
1370   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1371
1372   uint32_t* const alignedPixels = reinterpret_cast<uint32_t*>(pixels);
1373
1374   const uint32_t lastPair = EvenDown(width - 2);
1375
1376   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1377   {
1378     const uint32_t averaged = AveragePixelRGBA8888(alignedPixels[pixel], alignedPixels[pixel + 1]);
1379     alignedPixels[outPixel] = averaged;
1380   }
1381 }
1382
1383 void HalveScanlineInPlaceRGB565(uint8_t* pixels, uint32_t width)
1384 {
1385   DebugAssertScanlineParameters(pixels, width);
1386   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1387
1388   uint16_t* const alignedPixels = reinterpret_cast<uint16_t*>(pixels);
1389
1390   const uint32_t lastPair = EvenDown(width - 2);
1391
1392   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1393   {
1394     const uint16_t averaged = AveragePixelRGB565(alignedPixels[pixel], alignedPixels[pixel + 1]);
1395     alignedPixels[outPixel] = averaged;
1396   }
1397 }
1398
1399 void HalveScanlineInPlace2Bytes(uint8_t* const pixels, const uint32_t width)
1400 {
1401   DebugAssertScanlineParameters(pixels, width);
1402
1403   const uint32_t lastPair = EvenDown(width - 2);
1404
1405   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1406   {
1407     /**
1408      * @code
1409      * // Load all the byte pixel components we need:
1410      * const uint32_t c11 = pixels[pixel * 2];
1411      * const uint32_t c12 = pixels[pixel * 2 + 1];
1412      * const uint32_t c21 = pixels[pixel * 2 + 2];
1413      * const uint32_t c22 = pixels[pixel * 2 + 3];
1414      *
1415      * // Save the averaged byte pixel components:
1416      * pixels[outPixel * 2]     = static_cast<uint8_t>(AverageComponent(c11, c21));
1417      * pixels[outPixel * 2 + 1] = static_cast<uint8_t>(AverageComponent(c12, c22));
1418      * @endcode
1419      */
1420     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1421     pixels[(outPixel << 1)]     = ((pixels[(pixel << 1)] ^ pixels[(pixel << 1) | 2]) >> 1) + (pixels[(pixel << 1)] & pixels[(pixel << 1) | 2]);
1422     pixels[(outPixel << 1) | 1] = ((pixels[(pixel << 1) | 1] ^ pixels[(pixel << 1) | 3]) >> 1) + (pixels[(pixel << 1) | 1] & pixels[(pixel << 1) | 3]);
1423   }
1424 }
1425
1426 void HalveScanlineInPlace1Byte(uint8_t* const pixels, const uint32_t width)
1427 {
1428   DebugAssertScanlineParameters(pixels, width);
1429
1430   const uint32_t lastPair = EvenDown(width - 2);
1431
1432   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1433   {
1434     /**
1435      * @code
1436      * // Load all the byte pixel components we need:
1437      * const uint32_t c1 = pixels[pixel];
1438      * const uint32_t c2 = pixels[pixel + 1];
1439      *
1440      * // Save the averaged byte pixel component:
1441      * pixels[outPixel] = static_cast<uint8_t>(AverageComponent(c1, c2));
1442      * @endcode
1443      */
1444     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1445     pixels[outPixel] = ((pixels[pixel] ^ pixels[pixel | 1]) >> 1) + (pixels[pixel] & pixels[pixel | 1]);
1446   }
1447 }
1448
1449 // AverageScanline
1450
1451 namespace
1452 {
1453 /**
1454  * @copydoc AverageScanlines1
1455  * @note This API average eight components in one operation.
1456  * @note Only possible if each scanline pointer's address aligned
1457  * It will give performance benifit.
1458  */
1459 inline void AverageScanlinesWithMultipleComponents(
1460   const uint8_t* const scanline1,
1461   const uint8_t* const __restrict__ scanline2,
1462   uint8_t* const outputScanline,
1463   const uint32_t totalComponentCount)
1464 {
1465   uint32_t component = 0;
1466   if(DALI_LIKELY(totalComponentCount >= 16))
1467   {
1468     // Note reinsterpret_cast from uint8_t to uint64_t (or uint32_t) and read/write only allowed
1469     // If pointer of data is aligned well.
1470     // (to avoid SIGBUS)
1471
1472     // To increase the percentage of optimized works, let we check pre-padding value of each pointer.
1473     auto scanline1Padding   = (reinterpret_cast<std::ptrdiff_t>(scanline1) & (sizeof(std::uint64_t) - 1));
1474     auto scanline2Padding   = (reinterpret_cast<std::ptrdiff_t>(scanline2) & (sizeof(std::uint64_t) - 1));
1475     auto outScanlinePadding = (reinterpret_cast<std::ptrdiff_t>(outputScanline) & (sizeof(std::uint64_t) - 1));
1476     if((scanline1Padding == scanline2Padding) && (scanline1Padding == outScanlinePadding))
1477     {
1478       const auto padding = (sizeof(std::uint64_t) - scanline1Padding) & (sizeof(std::uint64_t) - 1);
1479
1480       // Prepadding range calculate
1481       for(std::uint32_t i = 0; i < padding; ++i)
1482       {
1483         const auto& c1    = scanline1[i];
1484         const auto& c2    = scanline2[i];
1485         outputScanline[i] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1486       }
1487
1488       // Jump 8 components in one step
1489       const std::uint64_t* const scanline18Step = reinterpret_cast<const std::uint64_t* const>(scanline1 + padding);
1490       const std::uint64_t* const scanline28Step = reinterpret_cast<const std::uint64_t* const>(scanline2 + padding);
1491       std::uint64_t* const       output8step    = reinterpret_cast<std::uint64_t* const>(outputScanline + padding);
1492
1493       const std::uint32_t totalStepCount = (totalComponentCount) >> 3;
1494       component                          = (totalStepCount << 3) + padding;
1495
1496       // and for each step, calculate average of 8 bytes.
1497       for(std::uint32_t i = 0; i < totalStepCount; ++i)
1498       {
1499         const auto& c1     = *(scanline18Step + i);
1500         const auto& c2     = *(scanline28Step + i);
1501         *(output8step + i) = static_cast<std::uint64_t>((((c1 ^ c2) & 0xfefefefefefefefeull) >> 1) + (c1 & c2));
1502       }
1503     }
1504     else if(((scanline1Padding & (sizeof(std::uint32_t) - 1)) == (scanline2Padding & (sizeof(std::uint32_t) - 1))) &&
1505             ((scanline1Padding & (sizeof(std::uint32_t) - 1)) == (outScanlinePadding & (sizeof(std::uint32_t) - 1))))
1506     {
1507       const auto padding = (sizeof(std::uint64_t) - scanline1Padding) & (sizeof(std::uint32_t) - 1);
1508
1509       // Prepadding range calculate
1510       for(std::uint32_t i = 0; i < padding; ++i)
1511       {
1512         const auto& c1    = scanline1[i];
1513         const auto& c2    = scanline2[i];
1514         outputScanline[i] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1515       }
1516
1517       // Jump 4 components in one step
1518       const std::uint32_t* const scanline14Step = reinterpret_cast<const std::uint32_t* const>(scanline1 + padding);
1519       const std::uint32_t* const scanline24Step = reinterpret_cast<const std::uint32_t* const>(scanline2 + padding);
1520       std::uint32_t* const       output4step    = reinterpret_cast<std::uint32_t* const>(outputScanline + padding);
1521
1522       const std::uint32_t totalStepCount = (totalComponentCount) >> 2;
1523       component                          = (totalStepCount << 2) + padding;
1524
1525       // and for each step, calculate average of 4 bytes.
1526       for(std::uint32_t i = 0; i < totalStepCount; ++i)
1527       {
1528         const auto& c1     = *(scanline14Step + i);
1529         const auto& c2     = *(scanline24Step + i);
1530         *(output4step + i) = static_cast<std::uint32_t>((((c1 ^ c2) & 0xfefefefeu) >> 1) + (c1 & c2));
1531       }
1532     }
1533   }
1534   // remaining components calculate
1535   for(; component < totalComponentCount; ++component)
1536   {
1537     const auto& c1            = scanline1[component];
1538     const auto& c2            = scanline2[component];
1539     outputScanline[component] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1540   }
1541 }
1542
1543 } // namespace
1544
1545 void AverageScanlines1(const uint8_t* const scanline1,
1546                        const uint8_t* const __restrict__ scanline2,
1547                        uint8_t* const outputScanline,
1548                        const uint32_t width)
1549 {
1550   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width);
1551
1552   /**
1553    * @code
1554    * for(uint32_t component = 0; component < width; ++component)
1555    * {
1556    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1557    * }
1558    * @endcode
1559    */
1560   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width);
1561 }
1562
1563 void AverageScanlines2(const uint8_t* const scanline1,
1564                        const uint8_t* const __restrict__ scanline2,
1565                        uint8_t* const outputScanline,
1566                        const uint32_t width)
1567 {
1568   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1569
1570   /**
1571    * @code
1572    * for(uint32_t component = 0; component < width * 2; ++component)
1573    * {
1574    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1575    * }
1576    * @endcode
1577    */
1578   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width * 2);
1579 }
1580
1581 void AverageScanlines3(const uint8_t* const scanline1,
1582                        const uint8_t* const __restrict__ scanline2,
1583                        uint8_t* const outputScanline,
1584                        const uint32_t width)
1585 {
1586   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 3);
1587
1588   /**
1589    * @code
1590    * for(uint32_t component = 0; component < width * 3; ++component)
1591    * {
1592    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1593    * }
1594    * @endcode
1595    */
1596   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width * 3);
1597 }
1598
1599 void AverageScanlinesRGBA8888(const uint8_t* const scanline1,
1600                               const uint8_t* const __restrict__ scanline2,
1601                               uint8_t* const outputScanline,
1602                               const uint32_t width)
1603 {
1604   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 4);
1605   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1606   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1607   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1608
1609   /**
1610    * @code
1611    * const uint32_t* const alignedScanline1 = reinterpret_cast<const uint32_t*>(scanline1);
1612    * const uint32_t* const alignedScanline2 = reinterpret_cast<const uint32_t*>(scanline2);
1613    * uint32_t* const       alignedOutput    = reinterpret_cast<uint32_t*>(outputScanline);
1614    *
1615    * for(uint32_t pixel = 0; pixel < width; ++pixel)
1616    * {
1617    *   alignedOutput[pixel] = AveragePixelRGBA8888(alignedScanline1[pixel], alignedScanline2[pixel]);
1618    * }
1619    * @endcode
1620    */
1621
1622   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width * 4u);
1623 }
1624
1625 void AverageScanlinesRGB565(const uint8_t* const scanline1,
1626                             const uint8_t* const __restrict__ scanline2,
1627                             uint8_t* const outputScanline,
1628                             const uint32_t width)
1629 {
1630   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1631   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1632   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1633   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1634
1635   const uint16_t* const alignedScanline1 = reinterpret_cast<const uint16_t*>(scanline1);
1636   const uint16_t* const alignedScanline2 = reinterpret_cast<const uint16_t*>(scanline2);
1637   uint16_t* const       alignedOutput    = reinterpret_cast<uint16_t*>(outputScanline);
1638
1639   for(uint32_t pixel = 0; pixel < width; ++pixel)
1640   {
1641     alignedOutput[pixel] = AveragePixelRGB565(alignedScanline1[pixel], alignedScanline2[pixel]);
1642   }
1643 }
1644
1645 /// Dispatch to pixel format appropriate box filter downscaling functions.
1646 void DownscaleInPlacePow2(uint8_t* const     pixels,
1647                           Pixel::Format      pixelFormat,
1648                           uint32_t           inputWidth,
1649                           uint32_t           inputHeight,
1650                           uint32_t           inputStride,
1651                           uint32_t           desiredWidth,
1652                           uint32_t           desiredHeight,
1653                           FittingMode::Type  fittingMode,
1654                           SamplingMode::Type samplingMode,
1655                           uint32_t&          outWidth,
1656                           uint32_t&          outHeight,
1657                           uint32_t&          outStride)
1658 {
1659   outWidth  = inputWidth;
1660   outHeight = inputHeight;
1661   outStride = inputStride;
1662   // Perform power of 2 iterated 4:1 box filtering if the requested filter mode requires it:
1663   if(samplingMode == SamplingMode::BOX || samplingMode == SamplingMode::BOX_THEN_NEAREST || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1664   {
1665     // Check the pixel format is one that is supported:
1666     if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::CHROMINANCE_U || pixelFormat == Pixel::CHROMINANCE_V)
1667     {
1668       const BoxDimensionTest dimensionTest = DimensionTestForScalingMode(fittingMode);
1669
1670       switch(pixelFormat)
1671       {
1672         case Pixel::RGBA8888:
1673         {
1674           Internal::Platform::DownscaleInPlacePow2RGBA8888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1675           break;
1676         }
1677         case Pixel::RGB888:
1678         {
1679           Internal::Platform::DownscaleInPlacePow2RGB888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1680           break;
1681         }
1682         case Pixel::RGB565:
1683         {
1684           Internal::Platform::DownscaleInPlacePow2RGB565(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1685           break;
1686         }
1687         case Pixel::LA88:
1688         {
1689           Internal::Platform::DownscaleInPlacePow2ComponentPair(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1690           break;
1691         }
1692         case Pixel::L8:
1693         case Pixel::A8:
1694         case Pixel::CHROMINANCE_U:
1695         case Pixel::CHROMINANCE_V:
1696         {
1697           Internal::Platform::DownscaleInPlacePow2SingleBytePerPixel(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1698           break;
1699         }
1700         default:
1701         {
1702           DALI_ASSERT_DEBUG(false && "Inner branch conditions don't match outer branch.");
1703         }
1704       }
1705     }
1706   }
1707   else
1708   {
1709     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not shrunk: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
1710   }
1711 }
1712
1713 void DownscaleInPlacePow2RGB888(uint8_t*         pixels,
1714                                 uint32_t         inputWidth,
1715                                 uint32_t         inputHeight,
1716                                 uint32_t         inputStride,
1717                                 uint32_t         desiredWidth,
1718                                 uint32_t         desiredHeight,
1719                                 BoxDimensionTest dimensionTest,
1720                                 uint32_t&        outWidth,
1721                                 uint32_t&        outHeight,
1722                                 uint32_t&        outStride)
1723 {
1724   DownscaleInPlacePow2Generic<3, HalveScanlineInPlaceRGB888, AverageScanlines3>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1725 }
1726
1727 void DownscaleInPlacePow2RGBA8888(uint8_t*         pixels,
1728                                   uint32_t         inputWidth,
1729                                   uint32_t         inputHeight,
1730                                   uint32_t         inputStride,
1731                                   uint32_t         desiredWidth,
1732                                   uint32_t         desiredHeight,
1733                                   BoxDimensionTest dimensionTest,
1734                                   uint32_t&        outWidth,
1735                                   uint32_t&        outHeight,
1736                                   uint32_t&        outStride)
1737 {
1738   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1739   DownscaleInPlacePow2Generic<4, HalveScanlineInPlaceRGBA8888, AverageScanlinesRGBA8888>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1740 }
1741
1742 void DownscaleInPlacePow2RGB565(uint8_t*         pixels,
1743                                 uint32_t         inputWidth,
1744                                 uint32_t         inputHeight,
1745                                 uint32_t         inputStride,
1746                                 uint32_t         desiredWidth,
1747                                 uint32_t         desiredHeight,
1748                                 BoxDimensionTest dimensionTest,
1749                                 uint32_t&        outWidth,
1750                                 uint32_t&        outHeight,
1751                                 uint32_t&        outStride)
1752 {
1753   DownscaleInPlacePow2Generic<2, HalveScanlineInPlaceRGB565, AverageScanlinesRGB565>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1754 }
1755
1756 /**
1757  * @copydoc DownscaleInPlacePow2RGB888
1758  *
1759  * For 2-byte formats such as lum8alpha8, but not packed 16 bit formats like RGB565.
1760  */
1761 void DownscaleInPlacePow2ComponentPair(uint8_t*         pixels,
1762                                        uint32_t         inputWidth,
1763                                        uint32_t         inputHeight,
1764                                        uint32_t         inputStride,
1765                                        uint32_t         desiredWidth,
1766                                        uint32_t         desiredHeight,
1767                                        BoxDimensionTest dimensionTest,
1768                                        uint32_t&        outWidth,
1769                                        uint32_t&        outHeight,
1770                                        uint32_t&        outStride)
1771 {
1772   DownscaleInPlacePow2Generic<2, HalveScanlineInPlace2Bytes, AverageScanlines2>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1773 }
1774
1775 void DownscaleInPlacePow2SingleBytePerPixel(uint8_t*         pixels,
1776                                             uint32_t         inputWidth,
1777                                             uint32_t         inputHeight,
1778                                             uint32_t         inputStride,
1779                                             uint32_t         desiredWidth,
1780                                             uint32_t         desiredHeight,
1781                                             BoxDimensionTest dimensionTest,
1782                                             uint32_t&        outWidth,
1783                                             uint32_t&        outHeight,
1784                                             uint32_t&        outStride)
1785 {
1786   DownscaleInPlacePow2Generic<1, HalveScanlineInPlace1Byte, AverageScanlines1>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1787 }
1788
1789 // Point sampling group below
1790
1791 namespace
1792 {
1793 /**
1794  * @brief Point sample an image to a new resolution (like GL_NEAREST).
1795  *
1796  * Template is used purely as a type-safe code generator in this one
1797  * compilation unit. Generated code is inlined into type-specific wrapper
1798  * functions below which are exported to rest of module.
1799  */
1800 template<typename PIXEL>
1801 inline void PointSampleAddressablePixels(const uint8_t* inPixels,
1802                                          uint32_t       inputWidth,
1803                                          uint32_t       inputHeight,
1804                                          uint32_t       inputStride,
1805                                          uint8_t*       outPixels,
1806                                          uint32_t       desiredWidth,
1807                                          uint32_t       desiredHeight)
1808 {
1809   DALI_ASSERT_DEBUG(((desiredWidth <= inputWidth && desiredHeight <= inputHeight) ||
1810                      outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL) || outPixels <= inPixels - desiredWidth * desiredHeight * sizeof(PIXEL)) &&
1811                     "The input and output buffers must not overlap for an upscaling.");
1812   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1813   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1814
1815   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1816   {
1817     return;
1818   }
1819   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
1820   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
1821   const uint32_t     deltaX     = (inputWidth << 16u) / desiredWidth;
1822   const uint32_t     deltaY     = (inputHeight << 16u) / desiredHeight;
1823
1824   uint32_t inY = 0;
1825   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1826   {
1827     // Round fixed point y coordinate to nearest integer:
1828     const uint32_t     integerY    = (inY + (1u << 15u)) >> 16u;
1829     const PIXEL* const inScanline  = &inAligned[inputStride * integerY];
1830     PIXEL* const       outScanline = &outAligned[desiredWidth * outY];
1831
1832     DALI_ASSERT_DEBUG(integerY < inputHeight);
1833     DALI_ASSERT_DEBUG(reinterpret_cast<const uint8_t*>(inScanline) < (inPixels + inputStride * inputHeight * sizeof(PIXEL)));
1834     DALI_ASSERT_DEBUG(reinterpret_cast<uint8_t*>(outScanline) < (outPixels + desiredWidth * desiredHeight * sizeof(PIXEL)));
1835
1836     uint32_t inX = 0;
1837     for(uint32_t outX = 0; outX < desiredWidth; ++outX)
1838     {
1839       // Round the fixed-point x coordinate to an integer:
1840       const uint32_t     integerX       = (inX + (1u << 15u)) >> 16u;
1841       const PIXEL* const inPixelAddress = &inScanline[integerX];
1842       const PIXEL        pixel          = *inPixelAddress;
1843       outScanline[outX]                 = pixel;
1844       inX += deltaX;
1845     }
1846     inY += deltaY;
1847   }
1848 }
1849
1850 } // namespace
1851
1852 // RGBA8888
1853 void PointSample4BPP(const uint8_t* inPixels,
1854                      uint32_t       inputWidth,
1855                      uint32_t       inputHeight,
1856                      uint32_t       inputStride,
1857                      uint8_t*       outPixels,
1858                      uint32_t       desiredWidth,
1859                      uint32_t       desiredHeight)
1860 {
1861   PointSampleAddressablePixels<uint32_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1862 }
1863
1864 // RGB565, LA88
1865 void PointSample2BPP(const uint8_t* inPixels,
1866                      uint32_t       inputWidth,
1867                      uint32_t       inputHeight,
1868                      uint32_t       inputStride,
1869                      uint8_t*       outPixels,
1870                      uint32_t       desiredWidth,
1871                      uint32_t       desiredHeight)
1872 {
1873   PointSampleAddressablePixels<uint16_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1874 }
1875
1876 // L8, A8
1877 void PointSample1BPP(const uint8_t* inPixels,
1878                      uint32_t       inputWidth,
1879                      uint32_t       inputHeight,
1880                      uint32_t       inputStride,
1881                      uint8_t*       outPixels,
1882                      uint32_t       desiredWidth,
1883                      uint32_t       desiredHeight)
1884 {
1885   PointSampleAddressablePixels<uint8_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1886 }
1887
1888 /* RGB888
1889  * RGB888 is a special case as its pixels are not aligned addressable units.
1890  */
1891 void PointSample3BPP(const uint8_t* inPixels,
1892                      uint32_t       inputWidth,
1893                      uint32_t       inputHeight,
1894                      uint32_t       inputStride,
1895                      uint8_t*       outPixels,
1896                      uint32_t       desiredWidth,
1897                      uint32_t       desiredHeight)
1898 {
1899   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1900   {
1901     return;
1902   }
1903   const uint32_t BYTES_PER_PIXEL = 3;
1904
1905   // Generate fixed-point 16.16 deltas in input image coordinates:
1906   const uint32_t deltaX = (inputWidth << 16u) / desiredWidth;
1907   const uint32_t deltaY = (inputHeight << 16u) / desiredHeight;
1908
1909   // Step through output image in whole integer pixel steps while tracking the
1910   // corresponding locations in the input image using 16.16 fixed-point
1911   // coordinates:
1912   uint32_t inY = 0; //< 16.16 fixed-point input image y-coord.
1913   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1914   {
1915     const uint32_t       integerY    = (inY + (1u << 15u)) >> 16u;
1916     const uint8_t* const inScanline  = &inPixels[inputStride * integerY * BYTES_PER_PIXEL];
1917     uint8_t* const       outScanline = &outPixels[desiredWidth * outY * BYTES_PER_PIXEL];
1918     uint32_t             inX         = 0; //< 16.16 fixed-point input image x-coord.
1919
1920     for(uint32_t outX = 0; outX < desiredWidth * BYTES_PER_PIXEL; outX += BYTES_PER_PIXEL)
1921     {
1922       // Round the fixed-point input coordinate to the address of the input pixel to sample:
1923       const uint32_t       integerX       = (inX + (1u << 15u)) >> 16u;
1924       const uint8_t* const inPixelAddress = &inScanline[integerX * BYTES_PER_PIXEL];
1925
1926       // Issue loads for all pixel color components up-front:
1927       const uint32_t c0 = inPixelAddress[0];
1928       const uint32_t c1 = inPixelAddress[1];
1929       const uint32_t c2 = inPixelAddress[2];
1930       ///@ToDo: Optimise - Benchmark one 32bit load that will be unaligned 2/3 of the time + 3 rotate and masks, versus these three aligned byte loads, versus using an RGB packed, aligned(1) struct and letting compiler pick a strategy.
1931
1932       // Output the pixel components:
1933       outScanline[outX]     = static_cast<uint8_t>(c0);
1934       outScanline[outX + 1] = static_cast<uint8_t>(c1);
1935       outScanline[outX + 2] = static_cast<uint8_t>(c2);
1936
1937       // Increment the fixed-point input coordinate:
1938       inX += deltaX;
1939     }
1940
1941     inY += deltaY;
1942   }
1943 }
1944
1945 // Dispatch to a format-appropriate point sampling function:
1946 void PointSample(const uint8_t* inPixels,
1947                  uint32_t       inputWidth,
1948                  uint32_t       inputHeight,
1949                  uint32_t       inputStride,
1950                  Pixel::Format  pixelFormat,
1951                  uint8_t*       outPixels,
1952                  uint32_t       desiredWidth,
1953                  uint32_t       desiredHeight)
1954 {
1955   // Check the pixel format is one that is supported:
1956   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::CHROMINANCE_U || pixelFormat == Pixel::CHROMINANCE_V)
1957   {
1958     switch(pixelFormat)
1959     {
1960       case Pixel::RGB888:
1961       {
1962         PointSample3BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1963         break;
1964       }
1965       case Pixel::RGBA8888:
1966       {
1967         PointSample4BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1968         break;
1969       }
1970       case Pixel::RGB565:
1971       case Pixel::LA88:
1972       {
1973         PointSample2BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1974         break;
1975       }
1976       case Pixel::L8:
1977       case Pixel::A8:
1978       case Pixel::CHROMINANCE_U:
1979       case Pixel::CHROMINANCE_V:
1980       {
1981         PointSample1BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1982         break;
1983       }
1984       default:
1985       {
1986         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
1987       }
1988     }
1989   }
1990   else
1991   {
1992     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not point sampled: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
1993   }
1994 }
1995
1996 // Linear sampling group below
1997
1998 namespace
1999 {
2000 /** @brief Blend 4 pixels together using horizontal and vertical weights. */
2001 inline uint8_t BilinearFilter1BPPByte(uint8_t tl, uint8_t tr, uint8_t bl, uint8_t br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2002 {
2003   return static_cast<uint8_t>(BilinearFilter1Component(tl, tr, bl, br, fractBlendHorizontal, fractBlendVertical));
2004 }
2005
2006 /** @copydoc BilinearFilter1BPPByte */
2007 inline Pixel2Bytes BilinearFilter2Bytes(Pixel2Bytes tl, Pixel2Bytes tr, Pixel2Bytes bl, Pixel2Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2008 {
2009   Pixel2Bytes pixel;
2010   pixel.l = static_cast<uint8_t>(BilinearFilter1Component(tl.l, tr.l, bl.l, br.l, fractBlendHorizontal, fractBlendVertical));
2011   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
2012   return pixel;
2013 }
2014
2015 /** @copydoc BilinearFilter1BPPByte */
2016 inline Pixel3Bytes BilinearFilterRGB888(Pixel3Bytes tl, Pixel3Bytes tr, Pixel3Bytes bl, Pixel3Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2017 {
2018   Pixel3Bytes pixel;
2019   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
2020   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
2021   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
2022   return pixel;
2023 }
2024
2025 /** @copydoc BilinearFilter1BPPByte */
2026 inline PixelRGB565 BilinearFilterRGB565(PixelRGB565 tl, PixelRGB565 tr, PixelRGB565 bl, PixelRGB565 br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2027 {
2028   const PixelRGB565 pixel = static_cast<PixelRGB565>((BilinearFilter1Component(tl >> 11u, tr >> 11u, bl >> 11u, br >> 11u, fractBlendHorizontal, fractBlendVertical) << 11u) +
2029                                                      (BilinearFilter1Component((tl >> 5u) & 63u, (tr >> 5u) & 63u, (bl >> 5u) & 63u, (br >> 5u) & 63u, fractBlendHorizontal, fractBlendVertical) << 5u) +
2030                                                      BilinearFilter1Component(tl & 31u, tr & 31u, bl & 31u, br & 31u, fractBlendHorizontal, fractBlendVertical));
2031   return pixel;
2032 }
2033
2034 /** @copydoc BilinearFilter1BPPByte */
2035 inline Pixel4Bytes BilinearFilter4Bytes(Pixel4Bytes tl, Pixel4Bytes tr, Pixel4Bytes bl, Pixel4Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2036 {
2037   Pixel4Bytes pixel;
2038   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
2039   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
2040   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
2041   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
2042   return pixel;
2043 }
2044
2045 /**
2046  * @brief Generic version of bilinear sampling image resize function.
2047  * @note Limited to one compilation unit and exposed through type-specific
2048  * wrapper functions below.
2049  */
2050 template<
2051   typename PIXEL,
2052   PIXEL (*BilinearFilter)(PIXEL tl, PIXEL tr, PIXEL bl, PIXEL br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical),
2053   bool DEBUG_ASSERT_ALIGNMENT>
2054 inline void LinearSampleGeneric(const uint8_t* __restrict__ inPixels,
2055                                 ImageDimensions inputDimensions,
2056                                 uint32_t        inputStride,
2057                                 uint8_t* __restrict__ outPixels,
2058                                 ImageDimensions desiredDimensions)
2059 {
2060   const uint32_t inputWidth    = inputDimensions.GetWidth();
2061   const uint32_t inputHeight   = inputDimensions.GetHeight();
2062   const uint32_t desiredWidth  = desiredDimensions.GetWidth();
2063   const uint32_t desiredHeight = desiredDimensions.GetHeight();
2064
2065   DALI_ASSERT_DEBUG(((outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL)) ||
2066                      (inPixels >= outPixels + desiredWidth * desiredHeight * sizeof(PIXEL))) &&
2067                     "Input and output buffers cannot overlap.");
2068   if(DEBUG_ASSERT_ALIGNMENT)
2069   {
2070     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
2071     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
2072   }
2073
2074   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
2075   {
2076     return;
2077   }
2078   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
2079   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
2080   const uint32_t     deltaX     = (inputWidth << 16u) / desiredWidth;
2081   const uint32_t     deltaY     = (inputHeight << 16u) / desiredHeight;
2082
2083   uint32_t inY = 0;
2084   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
2085   {
2086     PIXEL* const outScanline = &outAligned[desiredWidth * outY];
2087
2088     // Find the two scanlines to blend and the weight to blend with:
2089     const uint32_t integerY1    = inY >> 16u;
2090     const uint32_t integerY2    = integerY1 + 1 >= inputHeight ? integerY1 : integerY1 + 1;
2091     const uint32_t inputYWeight = inY & 65535u;
2092
2093     DALI_ASSERT_DEBUG(integerY1 < inputHeight);
2094     DALI_ASSERT_DEBUG(integerY2 < inputHeight);
2095
2096     const PIXEL* const inScanline1 = &inAligned[inputStride * integerY1];
2097     const PIXEL* const inScanline2 = &inAligned[inputStride * integerY2];
2098
2099     uint32_t inX = 0;
2100     for(uint32_t outX = 0; outX < desiredWidth; ++outX)
2101     {
2102       // Work out the two pixel scanline offsets for this cluster of four samples:
2103       const uint32_t integerX1 = inX >> 16u;
2104       const uint32_t integerX2 = integerX1 + 1 >= inputWidth ? integerX1 : integerX1 + 1;
2105
2106       // Execute the loads:
2107       const PIXEL pixel1 = inScanline1[integerX1];
2108       const PIXEL pixel2 = inScanline2[integerX1];
2109       const PIXEL pixel3 = inScanline1[integerX2];
2110       const PIXEL pixel4 = inScanline2[integerX2];
2111       ///@ToDo Optimise - for 1 and 2  and 4 byte types to execute a single 2, 4, or 8 byte load per pair (caveat clamping) and let half of them be unaligned.
2112
2113       // Weighted bilinear filter:
2114       const uint32_t inputXWeight = inX & 65535u;
2115       outScanline[outX]           = BilinearFilter(pixel1, pixel3, pixel2, pixel4, inputXWeight, inputYWeight);
2116
2117       inX += deltaX;
2118     }
2119     inY += deltaY;
2120   }
2121 }
2122
2123 } // namespace
2124
2125 // Format-specific linear scaling instantiations:
2126
2127 void LinearSample1BPP(const uint8_t* __restrict__ inPixels,
2128                       ImageDimensions inputDimensions,
2129                       uint32_t        inputStride,
2130                       uint8_t* __restrict__ outPixels,
2131                       ImageDimensions desiredDimensions)
2132 {
2133   LinearSampleGeneric<uint8_t, BilinearFilter1BPPByte, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2134 }
2135
2136 void LinearSample2BPP(const uint8_t* __restrict__ inPixels,
2137                       ImageDimensions inputDimensions,
2138                       uint32_t        inputStride,
2139                       uint8_t* __restrict__ outPixels,
2140                       ImageDimensions desiredDimensions)
2141 {
2142   LinearSampleGeneric<Pixel2Bytes, BilinearFilter2Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2143 }
2144
2145 void LinearSampleRGB565(const uint8_t* __restrict__ inPixels,
2146                         ImageDimensions inputDimensions,
2147                         uint32_t        inputStride,
2148                         uint8_t* __restrict__ outPixels,
2149                         ImageDimensions desiredDimensions)
2150 {
2151   LinearSampleGeneric<PixelRGB565, BilinearFilterRGB565, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2152 }
2153
2154 void LinearSample3BPP(const uint8_t* __restrict__ inPixels,
2155                       ImageDimensions inputDimensions,
2156                       uint32_t        inputStride,
2157                       uint8_t* __restrict__ outPixels,
2158                       ImageDimensions desiredDimensions)
2159 {
2160   LinearSampleGeneric<Pixel3Bytes, BilinearFilterRGB888, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2161 }
2162
2163 void LinearSample4BPP(const uint8_t* __restrict__ inPixels,
2164                       ImageDimensions inputDimensions,
2165                       uint32_t        inputStride,
2166                       uint8_t* __restrict__ outPixels,
2167                       ImageDimensions desiredDimensions)
2168 {
2169   LinearSampleGeneric<Pixel4Bytes, BilinearFilter4Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2170 }
2171
2172 // Dispatch to a format-appropriate linear sampling function:
2173 void LinearSample(const uint8_t* __restrict__ inPixels,
2174                   ImageDimensions inDimensions,
2175                   uint32_t        inStride,
2176                   Pixel::Format   pixelFormat,
2177                   uint8_t* __restrict__ outPixels,
2178                   ImageDimensions outDimensions)
2179 {
2180   // Check the pixel format is one that is supported:
2181   if(pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::CHROMINANCE_U || pixelFormat == Pixel::CHROMINANCE_V)
2182   {
2183     switch(pixelFormat)
2184     {
2185       case Pixel::RGB888:
2186       {
2187         LinearSample3BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2188         break;
2189       }
2190       case Pixel::RGBA8888:
2191       {
2192         LinearSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2193         break;
2194       }
2195       case Pixel::L8:
2196       case Pixel::A8:
2197       case Pixel::CHROMINANCE_U:
2198       case Pixel::CHROMINANCE_V:
2199       {
2200         LinearSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2201         break;
2202       }
2203       case Pixel::LA88:
2204       {
2205         LinearSample2BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2206         break;
2207       }
2208       case Pixel::RGB565:
2209       {
2210         LinearSampleRGB565(inPixels, inDimensions, inStride, outPixels, outDimensions);
2211         break;
2212       }
2213       default:
2214       {
2215         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2216       }
2217     }
2218   }
2219   else
2220   {
2221     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not linear sampled: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
2222   }
2223 }
2224
2225 void Resample(const uint8_t* __restrict__ inPixels,
2226               ImageDimensions inputDimensions,
2227               uint32_t        inputStride,
2228               uint8_t* __restrict__ outPixels,
2229               ImageDimensions   desiredDimensions,
2230               Resampler::Filter filterType,
2231               int               numChannels,
2232               bool              hasAlpha)
2233 {
2234   // Got from the test.cpp of the ImageResampler lib.
2235   const float ONE_DIV_255               = 1.0f / 255.0f;
2236   const int   MAX_UNSIGNED_CHAR         = std::numeric_limits<uint8_t>::max();
2237   const int   LINEAR_TO_SRGB_TABLE_SIZE = 4096;
2238   const int   ALPHA_CHANNEL             = hasAlpha ? (numChannels - 1) : 0;
2239
2240   static bool    loadColorSpaces = true;
2241   static float   srgbToLinear[MAX_UNSIGNED_CHAR + 1];
2242   static uint8_t linearToSrgb[LINEAR_TO_SRGB_TABLE_SIZE];
2243
2244   if(loadColorSpaces) // Only create the color space conversions on the first execution
2245   {
2246     loadColorSpaces = false;
2247
2248     for(int i = 0; i <= MAX_UNSIGNED_CHAR; ++i)
2249     {
2250       srgbToLinear[i] = pow(static_cast<float>(i) * ONE_DIV_255, DEFAULT_SOURCE_GAMMA);
2251     }
2252
2253     const float invLinearToSrgbTableSize = 1.0f / static_cast<float>(LINEAR_TO_SRGB_TABLE_SIZE);
2254     const float invSourceGamma           = 1.0f / DEFAULT_SOURCE_GAMMA;
2255
2256     for(int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
2257     {
2258       int k = static_cast<int>(255.0f * pow(static_cast<float>(i) * invLinearToSrgbTableSize, invSourceGamma) + 0.5f);
2259       if(k < 0)
2260       {
2261         k = 0;
2262       }
2263       else if(k > MAX_UNSIGNED_CHAR)
2264       {
2265         k = MAX_UNSIGNED_CHAR;
2266       }
2267       linearToSrgb[i] = static_cast<uint8_t>(k);
2268     }
2269   }
2270
2271   std::vector<Resampler*>    resamplers(numChannels);
2272   std::vector<Vector<float>> samples(numChannels);
2273
2274   const int srcWidth  = inputDimensions.GetWidth();
2275   const int srcHeight = inputDimensions.GetHeight();
2276   const int dstWidth  = desiredDimensions.GetWidth();
2277   const int dstHeight = desiredDimensions.GetHeight();
2278
2279   // Now create a Resampler instance for each component to process. The first instance will create new contributor tables, which are shared by the resamplers
2280   // used for the other components (a memory and slight cache efficiency optimization).
2281   resamplers[0] = new Resampler(srcWidth,
2282                                 srcHeight,
2283                                 dstWidth,
2284                                 dstHeight,
2285                                 Resampler::BOUNDARY_CLAMP,
2286                                 0.0f,          // sample_low,
2287                                 1.0f,          // sample_high. Clamp output samples to specified range, or disable clamping if sample_low >= sample_high.
2288                                 filterType,    // The type of filter.
2289                                 NULL,          // Pclist_x,
2290                                 NULL,          // Pclist_y. Optional pointers to contributor lists from another instance of a Resampler.
2291                                 FILTER_SCALE,  // src_x_ofs,
2292                                 FILTER_SCALE); // src_y_ofs. Offset input image by specified amount (fractional values okay).
2293   samples[0].ResizeUninitialized(srcWidth);
2294   for(int i = 1; i < numChannels; ++i)
2295   {
2296     resamplers[i] = new Resampler(srcWidth,
2297                                   srcHeight,
2298                                   dstWidth,
2299                                   dstHeight,
2300                                   Resampler::BOUNDARY_CLAMP,
2301                                   0.0f,
2302                                   1.0f,
2303                                   filterType,
2304                                   resamplers[0]->get_clist_x(),
2305                                   resamplers[0]->get_clist_y(),
2306                                   FILTER_SCALE,
2307                                   FILTER_SCALE);
2308     samples[i].ResizeUninitialized(srcWidth);
2309   }
2310
2311   const int srcPitch = inputStride * numChannels;
2312   const int dstPitch = dstWidth * numChannels;
2313   int       dstY     = 0;
2314
2315   for(int srcY = 0; srcY < srcHeight; ++srcY)
2316   {
2317     const uint8_t* pSrc = &inPixels[srcY * srcPitch];
2318
2319     for(int x = 0; x < srcWidth; ++x)
2320     {
2321       for(int c = 0; c < numChannels; ++c)
2322       {
2323         if(c == ALPHA_CHANNEL && hasAlpha)
2324         {
2325           samples[c][x] = *pSrc++ * ONE_DIV_255;
2326         }
2327         else
2328         {
2329           samples[c][x] = srgbToLinear[*pSrc++];
2330         }
2331       }
2332     }
2333
2334     for(int c = 0; c < numChannels; ++c)
2335     {
2336       if(!resamplers[c]->put_line(&samples[c][0]))
2337       {
2338         DALI_ASSERT_DEBUG(!"Out of memory");
2339       }
2340     }
2341
2342     for(;;)
2343     {
2344       int compIndex;
2345       for(compIndex = 0; compIndex < numChannels; ++compIndex)
2346       {
2347         const float* pOutputSamples = resamplers[compIndex]->get_line();
2348         if(!pOutputSamples)
2349         {
2350           break;
2351         }
2352
2353         const bool isAlphaChannel = (compIndex == ALPHA_CHANNEL && hasAlpha);
2354         DALI_ASSERT_DEBUG(dstY < dstHeight);
2355         uint8_t* pDst = &outPixels[dstY * dstPitch + compIndex];
2356
2357         for(int x = 0; x < dstWidth; ++x)
2358         {
2359           if(isAlphaChannel)
2360           {
2361             int c = static_cast<int>(255.0f * pOutputSamples[x] + 0.5f);
2362             if(c < 0)
2363             {
2364               c = 0;
2365             }
2366             else if(c > MAX_UNSIGNED_CHAR)
2367             {
2368               c = MAX_UNSIGNED_CHAR;
2369             }
2370             *pDst = static_cast<uint8_t>(c);
2371           }
2372           else
2373           {
2374             int j = static_cast<int>(LINEAR_TO_SRGB_TABLE_SIZE * pOutputSamples[x] + 0.5f);
2375             if(j < 0)
2376             {
2377               j = 0;
2378             }
2379             else if(j >= LINEAR_TO_SRGB_TABLE_SIZE)
2380             {
2381               j = LINEAR_TO_SRGB_TABLE_SIZE - 1;
2382             }
2383             *pDst = linearToSrgb[j];
2384           }
2385
2386           pDst += numChannels;
2387         }
2388       }
2389       if(compIndex < numChannels)
2390       {
2391         break;
2392       }
2393
2394       ++dstY;
2395     }
2396   }
2397
2398   // Delete the resamplers.
2399   for(int i = 0; i < numChannels; ++i)
2400   {
2401     delete resamplers[i];
2402   }
2403 }
2404
2405 void LanczosSample4BPP(const uint8_t* __restrict__ inPixels,
2406                        ImageDimensions inputDimensions,
2407                        uint32_t        inputStride,
2408                        uint8_t* __restrict__ outPixels,
2409                        ImageDimensions desiredDimensions)
2410 {
2411   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 4, true);
2412 }
2413
2414 void LanczosSample1BPP(const uint8_t* __restrict__ inPixels,
2415                        ImageDimensions inputDimensions,
2416                        uint32_t        inputStride,
2417                        uint8_t* __restrict__ outPixels,
2418                        ImageDimensions desiredDimensions)
2419 {
2420   // For L8 images
2421   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 1, false);
2422 }
2423
2424 // Dispatch to a format-appropriate third-party resampling function:
2425 void LanczosSample(const uint8_t* __restrict__ inPixels,
2426                    ImageDimensions inDimensions,
2427                    uint32_t        inStride,
2428                    Pixel::Format   pixelFormat,
2429                    uint8_t* __restrict__ outPixels,
2430                    ImageDimensions outDimensions)
2431 {
2432   // Check the pixel format is one that is supported:
2433   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::BGRA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
2434   {
2435     switch(pixelFormat)
2436     {
2437       case Pixel::RGBA8888:
2438       case Pixel::BGRA8888:
2439       {
2440         LanczosSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2441         break;
2442       }
2443       case Pixel::L8:
2444       case Pixel::A8:
2445       {
2446         LanczosSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2447         break;
2448       }
2449       default:
2450       {
2451         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2452       }
2453     }
2454   }
2455   else
2456   {
2457     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not lanczos sampled: unsupported pixel format: %u.\n", static_cast<uint32_t>(pixelFormat));
2458   }
2459 }
2460
2461 void RotateByShear(const uint8_t* const pixelsIn,
2462                    uint32_t             widthIn,
2463                    uint32_t             heightIn,
2464                    uint32_t             strideIn,
2465                    uint32_t             pixelSize,
2466                    float                radians,
2467                    uint8_t*&            pixelsOut,
2468                    uint32_t&            widthOut,
2469                    uint32_t&            heightOut)
2470 {
2471   // @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
2472
2473   // Do first the fast rotations to transform the angle into a (-45..45] range.
2474
2475   bool fastRotationPerformed = false;
2476   if((radians > Math::PI_4) && (radians <= RAD_135))
2477   {
2478     // Angle in (45.0 .. 135.0]
2479     // Rotate image by 90 degrees into temporary image,
2480     // so it requires only an extra rotation angle
2481     // of -45.0 .. +45.0 to complete rotation.
2482     fastRotationPerformed = Rotate90(pixelsIn,
2483                                      widthIn,
2484                                      heightIn,
2485                                      strideIn,
2486                                      pixelSize,
2487                                      pixelsOut,
2488                                      widthOut,
2489                                      heightOut);
2490
2491     if(!fastRotationPerformed)
2492     {
2493       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2494       // The fast rotation failed.
2495       return;
2496     }
2497
2498     radians -= Math::PI_2;
2499   }
2500   else if((radians > RAD_135) && (radians <= RAD_225))
2501   {
2502     // Angle in (135.0 .. 225.0]
2503     // Rotate image by 180 degrees into temporary image,
2504     // so it requires only an extra rotation angle
2505     // of -45.0 .. +45.0 to complete rotation.
2506
2507     fastRotationPerformed = Rotate180(pixelsIn,
2508                                       widthIn,
2509                                       heightIn,
2510                                       strideIn,
2511                                       pixelSize,
2512                                       pixelsOut);
2513
2514     if(!fastRotationPerformed)
2515     {
2516       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2517       // The fast rotation failed.
2518       return;
2519     }
2520
2521     radians -= Math::PI;
2522     widthOut  = widthIn;
2523     heightOut = heightIn;
2524   }
2525   else if((radians > RAD_225) && (radians <= RAD_315))
2526   {
2527     // Angle in (225.0 .. 315.0]
2528     // Rotate image by 270 degrees into temporary image,
2529     // so it requires only an extra rotation angle
2530     // of -45.0 .. +45.0 to complete rotation.
2531
2532     fastRotationPerformed = Rotate270(pixelsIn,
2533                                       widthIn,
2534                                       heightIn,
2535                                       strideIn,
2536                                       pixelSize,
2537                                       pixelsOut,
2538                                       widthOut,
2539                                       heightOut);
2540
2541     if(!fastRotationPerformed)
2542     {
2543       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2544       // The fast rotation failed.
2545       return;
2546     }
2547
2548     radians -= RAD_270;
2549   }
2550
2551   if(fabs(radians) < Dali::Math::MACHINE_EPSILON_10)
2552   {
2553     // Nothing else to do if the angle is zero.
2554     // The rotation angle was 90, 180 or 270.
2555
2556     // @note Allocated memory by 'Fast Rotations', if any, has to be freed by the called to this function.
2557     return;
2558   }
2559
2560   const uint8_t* const                      firstHorizontalSkewPixelsIn = fastRotationPerformed ? pixelsOut : pixelsIn;
2561   std::unique_ptr<uint8_t, void (*)(void*)> tmpPixelsInPtr((fastRotationPerformed ? pixelsOut : nullptr), free);
2562
2563   uint32_t stride = fastRotationPerformed ? widthOut : strideIn;
2564
2565   // Reset the input/output
2566   widthIn   = widthOut;
2567   heightIn  = heightOut;
2568   pixelsOut = nullptr;
2569
2570   const float angleSinus   = sin(radians);
2571   const float angleCosinus = cos(radians);
2572   const float angleTangent = tan(0.5f * radians);
2573
2574   ///////////////////////////////////////
2575   // Perform 1st shear (horizontal)
2576   ///////////////////////////////////////
2577
2578   // Calculate first shear (horizontal) destination image dimensions
2579
2580   widthOut  = widthIn + static_cast<uint32_t>(fabs(angleTangent) * static_cast<float>(heightIn));
2581   heightOut = heightIn;
2582
2583   // Allocate the buffer for the 1st shear
2584   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2585
2586   if(nullptr == pixelsOut)
2587   {
2588     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2589     widthOut  = 0u;
2590     heightOut = 0u;
2591
2592     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2593
2594     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Fast rotations'.
2595     // Nothing else to do if the memory allocation fails.
2596     return;
2597   }
2598
2599   for(uint32_t y = 0u; y < heightOut; ++y)
2600   {
2601     const float shear = angleTangent * ((angleTangent >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2602
2603     const int intShear = static_cast<int>(floor(shear));
2604     HorizontalSkew(firstHorizontalSkewPixelsIn, widthIn, stride, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2605   }
2606
2607   // Reset the 'pixel in' pointer with the output of the 'First Horizontal Skew' and free the memory allocated by the 'Fast Rotations'.
2608   tmpPixelsInPtr.reset(pixelsOut);
2609   uint32_t tmpWidthIn  = widthOut;
2610   uint32_t tmpHeightIn = heightOut;
2611
2612   // Reset the input/output
2613   pixelsOut = nullptr;
2614
2615   ///////////////////////////////////////
2616   // Perform 2nd shear (vertical)
2617   ///////////////////////////////////////
2618
2619   // Calc 2nd shear (vertical) destination image dimensions
2620   heightOut = static_cast<uint32_t>(static_cast<float>(widthIn) * fabs(angleSinus) + static_cast<float>(heightIn) * angleCosinus);
2621
2622   // Allocate the buffer for the 2nd shear
2623   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2624
2625   if(nullptr == pixelsOut)
2626   {
2627     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2628     widthOut  = 0u;
2629     heightOut = 0u;
2630
2631     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2632     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'First Horizontal Skew'.
2633     // Nothing else to do if the memory allocation fails.
2634     return;
2635   }
2636
2637   // Variable skew offset
2638   float offset = angleSinus * ((angleSinus > 0.f) ? static_cast<float>(widthIn - 1u) : -(static_cast<float>(widthIn) - static_cast<float>(widthOut)));
2639
2640   uint32_t column = 0u;
2641   for(column = 0u; column < widthOut; ++column, offset -= angleSinus)
2642   {
2643     const int32_t shear = static_cast<int32_t>(floor(offset));
2644     VerticalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpHeightIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, heightOut, column, shear, offset - static_cast<float>(shear));
2645   }
2646   // Reset the 'pixel in' pointer with the output of the 'Vertical Skew' and free the memory allocated by the 'First Horizontal Skew'.
2647   // Reset the input/output
2648   tmpPixelsInPtr.reset(pixelsOut);
2649   tmpWidthIn  = widthOut;
2650   tmpHeightIn = heightOut;
2651   pixelsOut   = nullptr;
2652
2653   ///////////////////////////////////////
2654   // Perform 3rd shear (horizontal)
2655   ///////////////////////////////////////
2656
2657   // Calc 3rd shear (horizontal) destination image dimensions
2658   widthOut = static_cast<uint32_t>(static_cast<float>(heightIn) * fabs(angleSinus) + static_cast<float>(widthIn) * angleCosinus) + 1u;
2659
2660   // Allocate the buffer for the 3rd shear
2661   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2662
2663   if(nullptr == pixelsOut)
2664   {
2665     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2666     widthOut  = 0u;
2667     heightOut = 0u;
2668
2669     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2670     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2671     // Nothing else to do if the memory allocation fails.
2672     return;
2673   }
2674
2675   offset = (angleSinus >= 0.f) ? -angleSinus * angleTangent * static_cast<float>(widthIn - 1u) : angleTangent * (static_cast<float>(widthIn - 1u) * -angleSinus + (1.f - static_cast<float>(heightOut)));
2676
2677   for(uint32_t y = 0u; y < heightOut; ++y, offset += angleTangent)
2678   {
2679     const int32_t shear = static_cast<int32_t>(floor(offset));
2680     HorizontalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, y, shear, offset - static_cast<float>(shear));
2681   }
2682
2683   // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2684   // @note Allocated memory by the last 'Horizontal Skew' has to be freed by the caller to this function.
2685 }
2686
2687 void HorizontalShear(const uint8_t* const pixelsIn,
2688                      uint32_t             widthIn,
2689                      uint32_t             heightIn,
2690                      uint32_t             strideIn,
2691                      uint32_t             pixelSize,
2692                      float                radians,
2693                      uint8_t*&            pixelsOut,
2694                      uint32_t&            widthOut,
2695                      uint32_t&            heightOut)
2696 {
2697   // Calculate the destination image dimensions.
2698
2699   const float absRadians = fabs(radians);
2700
2701   if(absRadians > Math::PI_4)
2702   {
2703     // Can't shear more than 45 degrees.
2704     widthOut  = 0u;
2705     heightOut = 0u;
2706
2707     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Can't shear more than 45 degrees (PI/4 radians). radians : %f\n", radians);
2708     return;
2709   }
2710
2711   widthOut  = widthIn + static_cast<uint32_t>(ceil(absRadians * static_cast<float>(heightIn)));
2712   heightOut = heightIn;
2713
2714   // Allocate the buffer for the shear.
2715   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2716
2717   if(nullptr == pixelsOut)
2718   {
2719     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2720     widthOut  = 0u;
2721     heightOut = 0u;
2722
2723     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2724     return;
2725   }
2726
2727   for(uint32_t y = 0u; y < heightOut; ++y)
2728   {
2729     const float shear = radians * ((radians >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2730
2731     const int32_t intShear = static_cast<int32_t>(floor(shear));
2732     HorizontalSkew(pixelsIn, widthIn, strideIn, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2733   }
2734 }
2735
2736 } /* namespace Platform */
2737 } /* namespace Internal */
2738 } /* namespace Dali */