dali/internal/imaging/common/image-operations.cpp

   1 /*
   2  * Copyright (c) 2024 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 #include <dali/internal/imaging/common/image-operations.h>
  19
  20 // EXTERNAL INCLUDES
  21 #include <dali/devel-api/adaptor-framework/image-loading.h>
  22 #include <dali/integration-api/debug.h>
  23 #include <dali/integration-api/trace.h>
  24 #include <dali/public-api/common/dali-vector.h>
  25 #include <dali/public-api/math/vector2.h>
  26 #include <stddef.h>
  27 #include <third-party/resampler/resampler.h>
  28 #include <cmath>
  29 #include <cstring>
  30 #include <limits>
  31 #include <memory>
  32
  33 // INTERNAL INCLUDES
  34
  35 namespace Dali
  36 {
  37 namespace Internal
  38 {
  39 namespace Platform
  40 {
  41 namespace
  42 {
  43 // The BORDER_FILL_VALUE is a single byte value that is used for horizontal and vertical borders.
  44 // A value of 0x00 gives us transparency for pixel buffers with an alpha channel, or black otherwise.
  45 // We can optionally use a Vector4 color here, but at reduced fill speed.
  46 const uint8_t BORDER_FILL_VALUE(0x00);
  47 // A maximum size limit for newly created bitmaps. ( 1u << 16 ) - 1 is chosen as we are using 16bit words for dimensions.
  48 const uint32_t MAXIMUM_TARGET_BITMAP_SIZE((1u << 16) - 1);
  49
  50 // Constants used by the ImageResampler.
  51 const float DEFAULT_SOURCE_GAMMA = 1.75f; ///< Default source gamma value used in the Resampler() function. Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction.
  52 const float FILTER_SCALE         = 1.f;   ///< Default filter scale value used in the Resampler() function. Filter scale - values < 1.0 cause aliasing, but create sharper looking mips.
  53
  54 const float RAD_135 = Math::PI_2 + Math::PI_4; ///< 135 degrees in radians;
  55 const float RAD_225 = RAD_135 + Math::PI_2;    ///< 225 degrees in radians;
  56 const float RAD_270 = 3.f * Math::PI_2;        ///< 270 degrees in radians;
  57 const float RAD_315 = RAD_225 + Math::PI_2;    ///< 315 degrees in radians;
  58
  59 using Integration::Bitmap;
  60 using Integration::BitmapPtr;
  61 typedef uint8_t PixelBuffer;
  62
  63 DALI_INIT_TRACE_FILTER(gTraceFilter, DALI_TRACE_IMAGE_PERFORMANCE_MARKER, false);
  64
  65 /**
  66  * @brief 4 byte pixel structure.
  67  */
  68 struct Pixel4Bytes
  69 {
  70   uint8_t r;
  71   uint8_t g;
  72   uint8_t b;
  73   uint8_t a;
  74 } __attribute__((packed, aligned(4))); //< Tell the compiler it is okay to use a single 32 bit load.
  75
  76 /**
  77  * @brief RGB888 pixel structure.
  78  */
  79 struct Pixel3Bytes
  80 {
  81   uint8_t r;
  82   uint8_t g;
  83   uint8_t b;
  84 } __attribute__((packed, aligned(1)));
  85
  86 /**
  87  * @brief RGB565 pixel typedefed from a short.
  88  *
  89  * Access fields by manual shifting and masking.
  90  */
  91 typedef uint16_t PixelRGB565;
  92
  93 /**
  94  * @brief a Pixel composed of two independent byte components.
  95  */
  96 struct Pixel2Bytes
  97 {
  98   uint8_t l;
  99   uint8_t a;
 100 } __attribute__((packed, aligned(2))); //< Tell the compiler it is okay to use a single 16 bit load.
 101
 102 #if defined(DEBUG_ENABLED)
 103 /**
 104  * Disable logging of image operations or make it verbose from the commandline
 105  * as follows (e.g., for dali demo app):
 106  * <code>
 107  * LOG_IMAGE_OPERATIONS=0 dali-demo #< off
 108  * LOG_IMAGE_OPERATIONS=3 dali-demo #< on, verbose
 109  * </code>
 110  */
 111 Debug::Filter* gImageOpsLogFilter = Debug::Filter::New(Debug::NoLogging, false, "LOG_IMAGE_OPERATIONS");
 112 #endif
 113
 114 /** @return The greatest even number less than or equal to the argument. */
 115 inline uint32_t EvenDown(const uint32_t a)
 116 {
 117   const uint32_t evened = a & ~1u;
 118   return evened;
 119 }
 120
 121 /**
 122  * @brief Log bad parameters.
 123  */
 124 void ValidateScalingParameters(const uint32_t inputWidth,
 125                                const uint32_t inputHeight,
 126                                const uint32_t desiredWidth,
 127                                const uint32_t desiredHeight)
 128 {
 129   if(desiredWidth > inputWidth || desiredHeight > inputHeight)
 130   {
 131     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Upscaling not supported (%u, %u -> %u, %u).\n", inputWidth, inputHeight, desiredWidth, desiredHeight);
 132   }
 133
 134   if(desiredWidth == 0u || desiredHeight == 0u)
 135   {
 136     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Downscaling to a zero-area target is pointless.\n");
 137   }
 138
 139   if(inputWidth == 0u || inputHeight == 0u)
 140   {
 141     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Zero area images cannot be scaled\n");
 142   }
 143 }
 144
 145 /**
 146  * @brief Do debug assertions common to all scanline halving functions.
 147  * @note Inline and in anon namespace so should boil away in release builds.
 148  */
 149 inline void DebugAssertScanlineParameters(const uint8_t* const pixels, const uint32_t width)
 150 {
 151   DALI_ASSERT_DEBUG(pixels && "Null pointer.");
 152   DALI_ASSERT_DEBUG(width > 1u && "Can't average fewer than two pixels.");
 153   DALI_ASSERT_DEBUG(width < 131072u && "Unusually wide image: are you sure you meant to pass that value in?");
 154 }
 155
 156 /**
 157  * @brief Assertions on params to functions averaging pairs of scanlines.
 158  * @note Inline as intended to boil away in release.
 159  */
 160 inline void DebugAssertDualScanlineParameters(const uint8_t* const scanline1,
 161                                               const uint8_t* const scanline2,
 162                                               uint8_t* const       outputScanline,
 163                                               const size_t         widthInComponents)
 164 {
 165   DALI_ASSERT_DEBUG(scanline1 && "Null pointer.");
 166   DALI_ASSERT_DEBUG(scanline2 && "Null pointer.");
 167   DALI_ASSERT_DEBUG(outputScanline && "Null pointer.");
 168   DALI_ASSERT_DEBUG(((scanline1 >= scanline2 + widthInComponents) || (scanline2 >= scanline1 + widthInComponents)) && "Scanlines alias.");
 169   DALI_ASSERT_DEBUG(((outputScanline >= (scanline2 + widthInComponents)) || (scanline2 >= (scanline1 + widthInComponents))) && "Scanline 2 aliases output.");
 170 }
 171
 172 /**
 173  * @brief Converts a scaling mode to the definition of which dimensions matter when box filtering as a part of that mode.
 174  */
 175 BoxDimensionTest DimensionTestForScalingMode(FittingMode::Type fittingMode)
 176 {
 177   BoxDimensionTest dimensionTest;
 178   dimensionTest = BoxDimensionTestEither;
 179
 180   switch(fittingMode)
 181   {
 182     // Shrink to fit attempts to make one or zero dimensions smaller than the
 183     // desired dimensions and one or two dimensions exactly the same as the desired
 184     // ones, so as long as one dimension is larger than the desired size, box
 185     // filtering can continue even if the second dimension is smaller than the
 186     // desired dimensions:
 187     case FittingMode::SHRINK_TO_FIT:
 188     {
 189       dimensionTest = BoxDimensionTestEither;
 190       break;
 191     }
 192     // Scale to fill mode keeps both dimensions at least as large as desired:
 193     case FittingMode::SCALE_TO_FILL:
 194     {
 195       dimensionTest = BoxDimensionTestBoth;
 196       break;
 197     }
 198     // Y dimension is irrelevant when downscaling in FIT_WIDTH mode:
 199     case FittingMode::FIT_WIDTH:
 200     {
 201       dimensionTest = BoxDimensionTestX;
 202       break;
 203     }
 204     // X Dimension is ignored by definition in FIT_HEIGHT mode:
 205     case FittingMode::FIT_HEIGHT:
 206     {
 207       dimensionTest = BoxDimensionTestY;
 208       break;
 209     }
 210   }
 211
 212   return dimensionTest;
 213 }
 214
 215 /**
 216  * @brief Work out the dimensions for a uniform scaling of the input to map it
 217  * into the target while effecting ShinkToFit scaling mode.
 218  */
 219 ImageDimensions FitForShrinkToFit(ImageDimensions target, ImageDimensions source)
 220 {
 221   // Scale the input by the least extreme of the two dimensions:
 222   const float widthScale  = target.GetX() / float(source.GetX());
 223   const float heightScale = target.GetY() / float(source.GetY());
 224   const float scale       = widthScale < heightScale ? widthScale : heightScale;
 225
 226   // Do no scaling at all if the result would increase area:
 227   if(scale >= 1.0f)
 228   {
 229     return source;
 230   }
 231
 232   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 233 }
 234
 235 /**
 236  * @brief Work out the dimensions for a uniform scaling of the input to map it
 237  * into the target while effecting SCALE_TO_FILL scaling mode.
 238  * @note An image scaled into the output dimensions will need either top and
 239  * bottom or left and right to be cropped away unless the source was pre-cropped
 240  * to match the destination aspect ratio.
 241  */
 242 ImageDimensions FitForScaleToFill(ImageDimensions target, ImageDimensions source)
 243 {
 244   DALI_ASSERT_DEBUG(source.GetX() > 0 && source.GetY() > 0 && "Zero-area rectangles should not be passed-in");
 245   // Scale the input by the least extreme of the two dimensions:
 246   const float widthScale  = target.GetX() / float(source.GetX());
 247   const float heightScale = target.GetY() / float(source.GetY());
 248   const float scale       = widthScale > heightScale ? widthScale : heightScale;
 249
 250   // Do no scaling at all if the result would increase area:
 251   if(scale >= 1.0f)
 252   {
 253     return source;
 254   }
 255
 256   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 257 }
 258
 259 /**
 260  * @brief Work out the dimensions for a uniform scaling of the input to map it
 261  * into the target while effecting FIT_WIDTH scaling mode.
 262  */
 263 ImageDimensions FitForFitWidth(ImageDimensions target, ImageDimensions source)
 264 {
 265   DALI_ASSERT_DEBUG(source.GetX() > 0 && "Cant fit a zero-dimension rectangle.");
 266   const float scale = target.GetX() / float(source.GetX());
 267
 268   // Do no scaling at all if the result would increase area:
 269   if(scale >= 1.0f)
 270   {
 271     return source;
 272   }
 273   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 274 }
 275
 276 /**
 277  * @brief Work out the dimensions for a uniform scaling of the input to map it
 278  * into the target while effecting FIT_HEIGHT scaling mode.
 279  */
 280 ImageDimensions FitForFitHeight(ImageDimensions target, ImageDimensions source)
 281 {
 282   DALI_ASSERT_DEBUG(source.GetY() > 0 && "Cant fit a zero-dimension rectangle.");
 283   const float scale = target.GetY() / float(source.GetY());
 284
 285   // Do no scaling at all if the result would increase area:
 286   if(scale >= 1.0f)
 287   {
 288     return source;
 289   }
 290
 291   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 292 }
 293
 294 /**
 295  * @brief Generate the rectangle to use as the target of a pixel sampling pass
 296  * (e.g., nearest or linear).
 297  */
 298 ImageDimensions FitToScalingMode(ImageDimensions requestedSize, ImageDimensions sourceSize, FittingMode::Type fittingMode)
 299 {
 300   ImageDimensions fitDimensions;
 301   switch(fittingMode)
 302   {
 303     case FittingMode::SHRINK_TO_FIT:
 304     {
 305       fitDimensions = FitForShrinkToFit(requestedSize, sourceSize);
 306       break;
 307     }
 308     case FittingMode::SCALE_TO_FILL:
 309     {
 310       fitDimensions = FitForScaleToFill(requestedSize, sourceSize);
 311       break;
 312     }
 313     case FittingMode::FIT_WIDTH:
 314     {
 315       fitDimensions = FitForFitWidth(requestedSize, sourceSize);
 316       break;
 317     }
 318     case FittingMode::FIT_HEIGHT:
 319     {
 320       fitDimensions = FitForFitHeight(requestedSize, sourceSize);
 321       break;
 322     }
 323   }
 324
 325   return fitDimensions;
 326 }
 327
 328 /**
 329  * @brief Calculate the number of lines on the X and Y axis that need to be
 330  * either added or removed with repect to the specified fitting mode.
 331  * (e.g., nearest or linear).
 332  * @param[in]     sourceSize      The size of the source image
 333  * @param[in]     fittingMode     The fitting mode to use
 334  * @param[in/out] requestedSize   The target size that the image will be fitted to.
 335  *                                If the source image is smaller than the requested size, the source is not scaled up.
 336  *                                So we reduce the target size while keeping aspect by lowering resolution.
 337  * @param[out]    scanlinesToCrop The number of scanlines to remove from the image (can be negative to represent Y borders required)
 338  * @param[out]    columnsToCrop   The number of columns to remove from the image (can be negative to represent X borders required)
 339  */
 340 void CalculateBordersFromFittingMode(ImageDimensions sourceSize, FittingMode::Type fittingMode, ImageDimensions& requestedSize, int& scanlinesToCrop, int& columnsToCrop)
 341 {
 342   const int   sourceWidth(static_cast<int>(sourceSize.GetWidth()));
 343   const int   sourceHeight(static_cast<int>(sourceSize.GetHeight()));
 344   const float targetAspect(static_cast<float>(requestedSize.GetWidth()) / static_cast<float>(requestedSize.GetHeight()));
 345   int         finalWidth  = 0;
 346   int         finalHeight = 0;
 347
 348   switch(fittingMode)
 349   {
 350     case FittingMode::FIT_WIDTH:
 351     {
 352       finalWidth  = sourceWidth;
 353       finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 354       break;
 355     }
 356
 357     case FittingMode::FIT_HEIGHT:
 358     {
 359       finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 360       finalHeight = sourceHeight;
 361       break;
 362     }
 363
 364     case FittingMode::SHRINK_TO_FIT:
 365     {
 366       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 367       if(sourceAspect > targetAspect)
 368       {
 369         finalWidth  = sourceWidth;
 370         finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 371       }
 372       else
 373       {
 374         finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 375         finalHeight = sourceHeight;
 376       }
 377       break;
 378     }
 379
 380     case FittingMode::SCALE_TO_FILL:
 381     {
 382       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 383       if(sourceAspect > targetAspect)
 384       {
 385         finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 386         finalHeight = sourceHeight;
 387       }
 388       else
 389       {
 390         finalWidth  = sourceWidth;
 391         finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 392       }
 393       break;
 394     }
 395   }
 396
 397   // Clamp if overflowed
 398   if(DALI_UNLIKELY(finalWidth > std::numeric_limits<uint16_t>::max()))
 399   {
 400     finalWidth = std::numeric_limits<uint16_t>::max();
 401   }
 402   if(DALI_UNLIKELY(finalHeight > std::numeric_limits<uint16_t>::max()))
 403   {
 404     finalHeight = std::numeric_limits<uint16_t>::max();
 405   }
 406
 407   columnsToCrop   = -(finalWidth - sourceWidth);
 408   scanlinesToCrop = -(finalHeight - sourceHeight);
 409
 410   requestedSize.SetWidth(static_cast<uint16_t>(finalWidth));
 411   requestedSize.SetHeight(static_cast<uint16_t>(finalHeight));
 412 }
 413
 414 /**
 415  * @brief Construct a pixel buffer object from a copy of the pixel array passed in.
 416  */
 417 Dali::Devel::PixelBuffer MakePixelBuffer(const uint8_t* const pixels, Pixel::Format pixelFormat, uint32_t width, uint32_t height)
 418 {
 419   DALI_ASSERT_DEBUG(pixels && "Null bitmap buffer to copy.");
 420
 421   // Allocate a pixel buffer to hold the image passed in:
 422   auto newBitmap = Dali::Devel::PixelBuffer::New(width, height, pixelFormat);
 423
 424   // Copy over the pixels from the downscaled image that was generated in-place in the pixel buffer of the input bitmap:
 425   memcpy(newBitmap.GetBuffer(), pixels, width * height * Pixel::GetBytesPerPixel(pixelFormat));
 426   return newBitmap;
 427 }
 428
 429 /**
 430  * @brief Work out the desired width and height, accounting for zeros.
 431  *
 432  * @param[in] bitmapWidth Width of image before processing.
 433  * @param[in] bitmapHeight Height of image before processing.
 434  * @param[in] requestedWidth Width of area to scale image into. Can be zero.
 435  * @param[in] requestedHeight Height of area to scale image into. Can be zero.
 436  * @return Dimensions of area to scale image into after special rules are applied.
 437  */
 438 ImageDimensions CalculateDesiredDimensions(uint32_t bitmapWidth, uint32_t bitmapHeight, uint32_t requestedWidth, uint32_t requestedHeight)
 439 {
 440   uint32_t maxSize = Dali::GetMaxTextureSize();
 441
 442   // If no dimensions have been requested, default to the source ones:
 443   if(requestedWidth == 0 && requestedHeight == 0)
 444   {
 445     if(bitmapWidth <= maxSize && bitmapHeight <= maxSize)
 446     {
 447       return ImageDimensions(bitmapWidth, bitmapHeight);
 448     }
 449     else
 450     {
 451       // Calculate the size from the max texture size and the source image aspect ratio
 452       if(bitmapWidth > bitmapHeight)
 453       {
 454         return ImageDimensions(maxSize, bitmapHeight * maxSize / static_cast<float>(bitmapWidth) + 0.5f);
 455       }
 456       else
 457       {
 458         return ImageDimensions(bitmapWidth * maxSize / static_cast<float>(bitmapHeight) + 0.5f, maxSize);
 459       }
 460     }
 461   }
 462
 463   // If both dimensions have values requested, use them both:
 464   if(requestedWidth != 0 && requestedHeight != 0)
 465   {
 466     if(requestedWidth <= maxSize && requestedHeight <= maxSize)
 467     {
 468       return ImageDimensions(requestedWidth, requestedHeight);
 469     }
 470     else
 471     {
 472       // Calculate the size from the max texture size and the source image aspect ratio
 473       if(requestedWidth > requestedHeight)
 474       {
 475         return ImageDimensions(maxSize, requestedHeight * maxSize / static_cast<float>(requestedWidth) + 0.5f);
 476       }
 477       else
 478       {
 479         return ImageDimensions(requestedWidth * maxSize / static_cast<float>(requestedHeight) + 0.5f, maxSize);
 480       }
 481     }
 482   }
 483
 484   // Only one of the dimensions has been requested. Calculate the other from
 485   // the requested one and the source image aspect ratio:
 486   if(requestedWidth != 0)
 487   {
 488     requestedWidth = std::min(requestedWidth, maxSize);
 489     return ImageDimensions(requestedWidth, bitmapHeight / float(bitmapWidth) * requestedWidth + 0.5f);
 490   }
 491
 492   requestedHeight = std::min(requestedHeight, maxSize);
 493   return ImageDimensions(bitmapWidth / float(bitmapHeight) * requestedHeight + 0.5f, requestedHeight);
 494 }
 495
 496 /**
 497  * @brief Rotates the given buffer @p pixelsIn 90 degrees counter clockwise.
 498  *
 499  * @note It allocates memory for the returned @p pixelsOut buffer.
 500  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 501  * @note It may fail if malloc() fails to allocate memory.
 502  *
 503  * @param[in] pixelsIn The input buffer.
 504  * @param[in] widthIn The width of the input buffer.
 505  * @param[in] heightIn The height of the input buffer.
 506  * @param[in] strideIn The stride of the input buffer.
 507  * @param[in] pixelSize The size of the pixel.
 508  * @param[out] pixelsOut The rotated output buffer.
 509  * @param[out] widthOut The width of the output buffer.
 510  * @param[out] heightOut The height of the output buffer.
 511  *
 512  * @return Whether the rotation succeeded.
 513  */
 514 bool Rotate90(const uint8_t* const pixelsIn,
 515               uint32_t             widthIn,
 516               uint32_t             heightIn,
 517               uint32_t             strideIn,
 518               uint32_t             pixelSize,
 519               uint8_t*&            pixelsOut,
 520               uint32_t&            widthOut,
 521               uint32_t&            heightOut)
 522 {
 523   // The new size of the image.
 524   widthOut  = heightIn;
 525   heightOut = widthIn;
 526
 527   // Allocate memory for the rotated buffer.
 528   // Output buffer is tightly packed
 529   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 530   if(nullptr == pixelsOut)
 531   {
 532     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
 533     widthOut  = 0u;
 534     heightOut = 0u;
 535
 536     // Return if the memory allocations fails.
 537     return false;
 538   }
 539
 540   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_ROTATE_90");
 541
 542   // Rotate the buffer.
 543   for(uint32_t y = 0u; y < heightIn; ++y)
 544   {
 545     const uint32_t srcLineIndex = y * strideIn;
 546     const uint32_t dstX         = y;
 547     for(uint32_t x = 0u; x < widthIn; ++x)
 548     {
 549       const uint32_t dstY     = heightOut - x - 1u;
 550       const uint32_t dstIndex = pixelSize * (dstY * widthOut + dstX);
 551       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 552
 553       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 554       {
 555         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 556       }
 557     }
 558   }
 559
 560   return true;
 561 }
 562
 563 /**
 564  * @brief Rotates the given buffer @p pixelsIn 180 degrees counter clockwise.
 565  *
 566  * @note It allocates memory for the returned @p pixelsOut buffer.
 567  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 568  * @note It may fail if malloc() fails to allocate memory.
 569  *
 570  * @param[in] pixelsIn The input buffer.
 571  * @param[in] widthIn The width of the input buffer.
 572  * @param[in] heightIn The height of the input buffer.
 573  * @param[in] strideIn The stride of the input buffer.
 574  * @param[in] pixelSize The size of the pixel.
 575  * @param[out] pixelsOut The rotated output buffer.
 576  *
 577  * @return Whether the rotation succeeded.
 578  */
 579 bool Rotate180(const uint8_t* const pixelsIn,
 580                uint32_t             widthIn,
 581                uint32_t             heightIn,
 582                uint32_t             strideIn,
 583                uint32_t             pixelSize,
 584                uint8_t*&            pixelsOut)
 585 {
 586   // Allocate memory for the rotated buffer.
 587   // Output buffer is tightly packed
 588   pixelsOut = static_cast<uint8_t*>(malloc(widthIn * heightIn * pixelSize));
 589   if(nullptr == pixelsOut)
 590   {
 591     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthIn, heightIn, pixelSize);
 592     // Return if the memory allocations fails.
 593     return false;
 594   }
 595
 596   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_ROTATE_180");
 597
 598   // Rotate the buffer.
 599   for(uint32_t y = 0u; y < heightIn; ++y)
 600   {
 601     const uint32_t srcLineIndex = y * strideIn;
 602     const uint32_t dstY         = heightIn - y - 1u;
 603     for(uint32_t x = 0u; x < widthIn; ++x)
 604     {
 605       const uint32_t dstX     = widthIn - x - 1u;
 606       const uint32_t dstIndex = pixelSize * (dstY * widthIn + dstX);
 607       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 608
 609       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 610       {
 611         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 612       }
 613     }
 614   }
 615
 616   return true;
 617 }
 618
 619 /**
 620  * @brief Rotates the given buffer @p pixelsIn 270 degrees counter clockwise.
 621  *
 622  * @note It allocates memory for the returned @p pixelsOut buffer.
 623  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 624  * @note It may fail if malloc() fails to allocate memory.
 625  *
 626  * @param[in] pixelsIn The input buffer.
 627  * @param[in] widthIn The width of the input buffer.
 628  * @param[in] heightIn The height of the input buffer.
 629  * @param[in] strideIn The stride of the input buffer.
 630  * @param[in] pixelSize The size of the pixel.
 631  * @param[out] pixelsOut The rotated output buffer.
 632  * @param[out] widthOut The width of the output buffer.
 633  * @param[out] heightOut The height of the output buffer.
 634  *
 635  * @return Whether the rotation succeeded.
 636  */
 637 bool Rotate270(const uint8_t* const pixelsIn,
 638                uint32_t             widthIn,
 639                uint32_t             heightIn,
 640                uint32_t             strideIn,
 641                uint32_t             pixelSize,
 642                uint8_t*&            pixelsOut,
 643                uint32_t&            widthOut,
 644                uint32_t&            heightOut)
 645 {
 646   // The new size of the image.
 647   widthOut  = heightIn;
 648   heightOut = widthIn;
 649
 650   // Allocate memory for the rotated buffer.
 651   // Output buffer is tightly packed
 652   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 653   if(nullptr == pixelsOut)
 654   {
 655     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
 656     widthOut  = 0u;
 657     heightOut = 0u;
 658
 659     // Return if the memory allocations fails.
 660     return false;
 661   }
 662
 663   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_ROTATE_270");
 664
 665   // Rotate the buffer.
 666   for(uint32_t y = 0u; y < heightIn; ++y)
 667   {
 668     const uint32_t srcLineIndex = y * strideIn;
 669     const uint32_t dstX         = widthOut - y - 1u;
 670     for(uint32_t x = 0u; x < widthIn; ++x)
 671     {
 672       const uint32_t dstY     = x;
 673       const uint32_t dstIndex = pixelSize * (dstY * widthOut + dstX);
 674       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 675
 676       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 677       {
 678         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 679       }
 680     }
 681   }
 682
 683   return true;
 684 }
 685
 686 /**
 687  * @brief Skews a row horizontally (with filtered weights)
 688  *
 689  * @note Limited to 45 degree skewing only.
 690  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 691  *
 692  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 693  * @param[in] srcWidth The width of the input pixel buffer.
 694  * @param[in] srcStride The stride of the input pixel buffer.
 695  * @param[in] pixelSize The size of the pixel.
 696  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 697  * @param[in] dstWidth The width of the output pixel buffer.
 698  * @param[in] row The row index.
 699  * @param[in] offset The skew offset.
 700  * @param[in] weight The relative weight of right pixel.
 701  */
 702 void HorizontalSkew(const uint8_t* const srcBufferPtr,
 703                     uint32_t             srcWidth,
 704                     uint32_t             srcStride,
 705                     uint32_t             pixelSize,
 706                     uint8_t*&            dstBufferPtr,
 707                     uint32_t             dstWidth,
 708                     uint32_t             row,
 709                     int32_t              offset,
 710                     float                weight)
 711 {
 712   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_HORIZONTAL_SKEW");
 713   if(offset > 0)
 714   {
 715     // Fill gap left of skew with background.
 716     memset(dstBufferPtr + row * pixelSize * dstWidth, 0u, pixelSize * offset);
 717   }
 718
 719   uint8_t oldLeft[4u] = {0u, 0u, 0u, 0u};
 720
 721   for(uint32_t i = 0u; i < srcWidth; ++i)
 722   {
 723     // Loop through row pixels
 724     const uint32_t srcIndex = pixelSize * (row * srcStride + i);
 725
 726     uint8_t src[4u] = {0u, 0u, 0u, 0u};
 727     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 728     {
 729       src[channel] = *(srcBufferPtr + srcIndex + channel);
 730     }
 731
 732     // Calculate weights
 733     uint8_t left[4u] = {0u, 0u, 0u, 0u};
 734     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 735     {
 736       left[channel] = static_cast<uint8_t>(static_cast<float>(src[channel]) * weight);
 737
 738       // Update left over on source
 739       src[channel] -= (left[channel] - oldLeft[channel]);
 740     }
 741
 742     // Check boundaries
 743     if((static_cast<int32_t>(i) + offset >= 0) && (i + offset < dstWidth))
 744     {
 745       const uint32_t dstIndex = pixelSize * (row * dstWidth + i + offset);
 746
 747       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 748       {
 749         *(dstBufferPtr + dstIndex + channel) = src[channel];
 750       }
 751     }
 752
 753     // Save leftover for next pixel in scan
 754     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 755     {
 756       oldLeft[channel] = left[channel];
 757     }
 758   }
 759
 760   // Go to rightmost point of skew
 761   int32_t i = std::max(static_cast<int32_t>(srcWidth) + offset, -static_cast<int32_t>(dstWidth * row));
 762   if(i < static_cast<int32_t>(dstWidth))
 763   {
 764     // If still in image bounds, put leftovers there
 765     const uint32_t dstIndex = pixelSize * (row * dstWidth + i);
 766
 767     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 768     {
 769       *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 770     }
 771
 772     // Clear to the right of the skewed line with background
 773     ++i;
 774     memset(dstBufferPtr + pixelSize * (row * dstWidth + i), 0u, pixelSize * (dstWidth - i));
 775   }
 776 }
 777
 778 /**
 779  * @brief Skews a column vertically (with filtered weights)
 780  *
 781  * @note Limited to 45 degree skewing only.
 782  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 783  *
 784  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 785  * @param[in] srcWidth The width of the input pixel buffer.
 786  * @param[in] srcHeight The height of the input pixel buffer.
 787  * @param[in] srcStride The stride of the input pixel buffer.
 788  * @param[in] pixelSize The size of the pixel.
 789  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 790  * @param[in] dstWidth The width of the output pixel buffer.
 791  * @param[in] dstHeight The height of the output pixel buffer.
 792  * @param[in] column The column index.
 793  * @param[in] offset The skew offset.
 794  * @param[in] weight The relative weight of uppeer pixel.
 795  */
 796 void VerticalSkew(const uint8_t* const srcBufferPtr,
 797                   uint32_t             srcWidth,
 798                   uint32_t             srcHeight,
 799                   uint32_t             srcStride,
 800                   uint32_t             pixelSize,
 801                   uint8_t*&            dstBufferPtr,
 802                   uint32_t             dstWidth,
 803                   uint32_t             dstHeight,
 804                   uint32_t             column,
 805                   int32_t              offset,
 806                   float                weight)
 807 {
 808   DALI_TRACE_SCOPE(gTraceFilter, "DALI_BITMAP_VERTICAL_SKEW");
 809   for(int32_t i = 0; i < offset; ++i)
 810   {
 811     // Fill gap above skew with background
 812     const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 813
 814     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 815     {
 816       *(dstBufferPtr + dstIndex + channel) = 0u;
 817     }
 818   }
 819
 820   uint8_t oldLeft[4u] = {0u, 0u, 0u, 0u};
 821
 822   int32_t yPos = 0;
 823
 824   for(uint32_t i = 0u; i < srcHeight; ++i)
 825   {
 826     // Loop through column pixels
 827     const uint32_t srcIndex = pixelSize * (i * srcStride + column);
 828
 829     uint8_t src[4u] = {0u, 0u, 0u, 0u};
 830     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 831     {
 832       src[channel] = *(srcBufferPtr + srcIndex + channel);
 833     }
 834
 835     yPos = static_cast<int32_t>(i) + offset;
 836
 837     // Calculate weights
 838     uint8_t left[4u] = {0u, 0u, 0u, 0u};
 839     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 840     {
 841       left[channel] = static_cast<uint8_t>(static_cast<float>(src[channel]) * weight);
 842       // Update left over on source
 843       src[channel] -= (left[channel] - oldLeft[channel]);
 844     }
 845
 846     // Check boundaries
 847     if((yPos >= 0) && (yPos < static_cast<int32_t>(dstHeight)))
 848     {
 849       const uint32_t dstIndex = pixelSize * (yPos * dstWidth + column);
 850
 851       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 852       {
 853         *(dstBufferPtr + dstIndex + channel) = src[channel];
 854       }
 855     }
 856
 857     // Save leftover for next pixel in scan
 858     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 859     {
 860       oldLeft[channel] = left[channel];
 861     }
 862   }
 863
 864   // Go to bottom point of skew
 865   uint32_t i = 0;
 866
 867   if(yPos >= 0)
 868   {
 869     i = static_cast<uint32_t>(yPos);
 870     if(i < dstHeight)
 871     {
 872       // If still in image bounds, put leftovers there
 873       const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 874
 875       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 876       {
 877         *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 878       }
 879       ++i;
 880     }
 881   }
 882
 883   while(i < dstHeight)
 884   {
 885     // Clear below skewed line with background
 886     const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 887
 888     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 889     {
 890       *(dstBufferPtr + dstIndex + channel) = 0u;
 891     }
 892     ++i;
 893   }
 894 }
 895 } // namespace
 896
 897 ImageDimensions CalculateDesiredDimensions(ImageDimensions rawDimensions, ImageDimensions requestedDimensions)
 898 {
 899   return CalculateDesiredDimensions(rawDimensions.GetWidth(), rawDimensions.GetHeight(), requestedDimensions.GetWidth(), requestedDimensions.GetHeight());
 900 }
 901
 902 /**
 903  * @brief Apply cropping and padding for specified fitting mode.
 904  *
 905  * Once the bitmap has been (optionally) downscaled to an appropriate size, this method performs alterations
 906  * based on the fitting mode.
 907  *
 908  * This will add vertical or horizontal borders if necessary.
 909  * Crop the source image data vertically or horizontally if necessary.
 910  * The aspect of the source image is preserved.
 911  * If the source image is smaller than the desired size, the algorithm will modify the the newly created
 912  *   bitmaps dimensions to only be as large as necessary, as a memory saving optimization. This will cause
 913  *   GPU scaling to be performed at render time giving the same result with less texture traversal.
 914  *
 915  * @param[in] bitmap            The source pixel buffer to perform modifications on.
 916  * @param[in] desiredDimensions The target dimensions to aim to fill based on the fitting mode.
 917  * @param[in] fittingMode       The fitting mode to use.
 918  *
 919  * @return                      A new bitmap with the padding and cropping required for fitting mode applied.
 920  *                              If no modification is needed or possible, the passed in bitmap is returned.
 921  */
 922 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode);
 923
 924 /**
 925  * @brief Adds horizontal or vertical borders to the source image.
 926  *
 927  * @param[in] targetPixels     The destination image pointer to draw the borders on.
 928  * @param[in] bytesPerPixel    The number of bytes per pixel of the target pixel buffer.
 929  * @param[in] targetDimensions The dimensions of the destination image.
 930  * @param[in] padDimensions    The columns and scanlines to pad with borders.
 931  */
 932 void AddBorders(PixelBuffer* targetPixels, const uint32_t bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions);
 933
 934 Dali::Devel::PixelBuffer ApplyAttributesToBitmap(Dali::Devel::PixelBuffer bitmap, ImageDimensions dimensions, FittingMode::Type fittingMode, SamplingMode::Type samplingMode)
 935 {
 936   if(bitmap)
 937   {
 938     // Calculate the desired box, accounting for a possible zero component:
 939     const ImageDimensions desiredDimensions = CalculateDesiredDimensions(bitmap.GetWidth(), bitmap.GetHeight(), dimensions.GetWidth(), dimensions.GetHeight());
 940
 941     // If a different size than the raw one has been requested, resize the image
 942     // maximally using a repeated box filter without making it smaller than the
 943     // requested size in either dimension:
 944     bitmap = DownscaleBitmap(bitmap, desiredDimensions, fittingMode, samplingMode);
 945
 946     // Cut the bitmap according to the desired width and height so that the
 947     // resulting bitmap has the same aspect ratio as the desired dimensions.
 948     // Add crop and add borders if necessary depending on fitting mode.
 949     if(bitmap)
 950     {
 951       bitmap = CropAndPadForFittingMode(bitmap, desiredDimensions, fittingMode);
 952     }
 953   }
 954
 955   return bitmap;
 956 }
 957
 958 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode)
 959 {
 960   const uint32_t inputWidth  = bitmap.GetWidth();
 961   const uint32_t inputHeight = bitmap.GetHeight();
 962   const uint32_t inputStride = bitmap.GetStride();
 963
 964   if(desiredDimensions.GetWidth() < 1u || desiredDimensions.GetHeight() < 1u)
 965   {
 966     DALI_LOG_WARNING("Image scaling aborted as desired dimensions too small (%u, %u).\n", desiredDimensions.GetWidth(), desiredDimensions.GetHeight());
 967   }
 968   else if(inputWidth != desiredDimensions.GetWidth() || inputHeight != desiredDimensions.GetHeight())
 969   {
 970     // Calculate any padding or cropping that needs to be done based on the fitting mode.
 971     // Note: If the desired size is larger than the original image, the desired size will be
 972     // reduced while maintaining the aspect, in order to save unnecessary memory usage.
 973     int scanlinesToCrop = 0;
 974     int columnsToCrop   = 0;
 975
 976     CalculateBordersFromFittingMode(ImageDimensions(inputWidth, inputHeight), fittingMode, desiredDimensions, scanlinesToCrop, columnsToCrop);
 977
 978     uint32_t desiredWidth(desiredDimensions.GetWidth());
 979     uint32_t desiredHeight(desiredDimensions.GetHeight());
 980
 981     // Action the changes by making a new bitmap with the central part of the loaded one if required.
 982     if(scanlinesToCrop != 0 || columnsToCrop != 0)
 983     {
 984       // Split the adding and removing of scanlines and columns into separate variables,
 985       // so we can use one piece of generic code to action the changes.
 986       uint32_t scanlinesToPad = 0;
 987       uint32_t columnsToPad   = 0;
 988       if(scanlinesToCrop < 0)
 989       {
 990         scanlinesToPad  = -scanlinesToCrop;
 991         scanlinesToCrop = 0;
 992       }
 993       if(columnsToCrop < 0)
 994       {
 995         columnsToPad  = -columnsToCrop;
 996         columnsToCrop = 0;
 997       }
 998
 999       // If there is no filtering, then the final image size can become very large, exit if larger than maximum.
1000       if((desiredWidth > MAXIMUM_TARGET_BITMAP_SIZE) || (desiredHeight > MAXIMUM_TARGET_BITMAP_SIZE) ||
1001          (columnsToPad > MAXIMUM_TARGET_BITMAP_SIZE) || (scanlinesToPad > MAXIMUM_TARGET_BITMAP_SIZE))
1002       {
1003         DALI_LOG_WARNING("Image scaling aborted as final dimensions too large (%u, %u).\n", desiredWidth, desiredHeight);
1004         return bitmap;
1005       }
1006
1007       DALI_TRACE_BEGIN_WITH_MESSAGE_GENERATOR(gTraceFilter, "DALI_CROP_AND_PAD_BITMAP", [&](std::ostringstream& oss) {
1008         oss << "[origin:" << inputWidth << "x" << inputHeight << " ";
1009         oss << "desired:" << desiredWidth << "x" << desiredHeight << " ";
1010         oss << "fittingMode:" << fittingMode << "]";
1011       });
1012
1013       // Create new PixelBuffer with the desired size.
1014       const auto pixelFormat = bitmap.GetPixelFormat();
1015
1016       auto croppedBitmap = Devel::PixelBuffer::New(desiredWidth, desiredHeight, pixelFormat);
1017
1018       // Add some pre-calculated offsets to the bitmap pointers so this is not done within a loop.
1019       // The cropping is added to the source pointer, and the padding is added to the destination.
1020       const auto               bytesPerPixel      = Pixel::GetBytesPerPixel(pixelFormat);
1021       const PixelBuffer* const sourcePixels       = bitmap.GetBuffer() + ((((scanlinesToCrop / 2) * inputStride) + (columnsToCrop / 2)) * bytesPerPixel);
1022       PixelBuffer* const       targetPixels       = croppedBitmap.GetBuffer();
1023       PixelBuffer* const       targetPixelsActive = targetPixels + ((((scanlinesToPad / 2) * desiredWidth) + (columnsToPad / 2)) * bytesPerPixel);
1024       DALI_ASSERT_DEBUG(sourcePixels && targetPixels);
1025
1026       // Copy the image data to the new bitmap.
1027       // Optimize to a single memcpy if the left and right edges don't need a crop or a pad.
1028       uint32_t outputSpan(desiredWidth * bytesPerPixel);
1029       if(columnsToCrop == 0 && columnsToPad == 0 && inputStride == inputWidth)
1030       {
1031         memcpy(targetPixelsActive, sourcePixels, (desiredHeight - scanlinesToPad) * outputSpan);
1032       }
1033       else
1034       {
1035         // The width needs to change (due to either a crop or a pad), so we copy a scanline at a time.
1036         // Precalculate any constants to optimize the inner loop.
1037         const uint32_t inputSpan(inputStride * bytesPerPixel);
1038         const uint32_t copySpan((desiredWidth - columnsToPad) * bytesPerPixel);
1039         const uint32_t scanlinesToCopy(desiredHeight - scanlinesToPad);
1040
1041         for(uint32_t y = 0; y < scanlinesToCopy; ++y)
1042         {
1043           memcpy(&targetPixelsActive[y * outputSpan], &sourcePixels[y * inputSpan], copySpan);
1044         }
1045       }
1046
1047       // Add vertical or horizontal borders to the final image (if required).
1048       desiredDimensions.SetWidth(desiredWidth);
1049       desiredDimensions.SetHeight(desiredHeight);
1050       AddBorders(croppedBitmap.GetBuffer(), bytesPerPixel, desiredDimensions, ImageDimensions(columnsToPad, scanlinesToPad));
1051       // Overwrite the loaded bitmap with the cropped version
1052       bitmap = croppedBitmap;
1053
1054       DALI_TRACE_END(gTraceFilter, "DALI_CROP_AND_PAD_BITMAP");
1055     }
1056   }
1057
1058   return bitmap;
1059 }
1060
1061 void AddBorders(PixelBuffer* targetPixels, const uint32_t bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions)
1062 {
1063   // Assign ints for faster access.
1064   uint32_t desiredWidth(targetDimensions.GetWidth());
1065   uint32_t desiredHeight(targetDimensions.GetHeight());
1066   uint32_t columnsToPad(padDimensions.GetWidth());
1067   uint32_t scanlinesToPad(padDimensions.GetHeight());
1068   uint32_t outputSpan(desiredWidth * bytesPerPixel);
1069
1070   // Add letterboxing (symmetrical borders) if needed.
1071   if(scanlinesToPad > 0)
1072   {
1073     // Add a top border. Note: This is (deliberately) rounded down if padding is an odd number.
1074     memset(targetPixels, BORDER_FILL_VALUE, (scanlinesToPad / 2) * outputSpan);
1075
1076     // We subtract scanlinesToPad/2 from scanlinesToPad so that we have the correct
1077     // offset for odd numbers (as the top border is 1 pixel smaller in these cases.
1078     uint32_t bottomBorderHeight = scanlinesToPad - (scanlinesToPad / 2);
1079
1080     // Bottom border.
1081     memset(&targetPixels[(desiredHeight - bottomBorderHeight) * outputSpan], BORDER_FILL_VALUE, bottomBorderHeight * outputSpan);
1082   }
1083   else if(columnsToPad > 0)
1084   {
1085     // Add a left and right border.
1086     // Left:
1087     // Pre-calculate span size outside of loop.
1088     uint32_t leftBorderSpanWidth((columnsToPad / 2) * bytesPerPixel);
1089     for(uint32_t y = 0; y < desiredHeight; ++y)
1090     {
1091       memset(&targetPixels[y * outputSpan], BORDER_FILL_VALUE, leftBorderSpanWidth);
1092     }
1093
1094     // Right:
1095     // Pre-calculate the initial x offset as it is always the same for a small optimization.
1096     // We subtract columnsToPad/2 from columnsToPad so that we have the correct
1097     // offset for odd numbers (as the left border is 1 pixel smaller in these cases.
1098     uint32_t           rightBorderWidth = columnsToPad - (columnsToPad / 2);
1099     PixelBuffer* const destPixelsRightBorder(targetPixels + ((desiredWidth - rightBorderWidth) * bytesPerPixel));
1100     uint32_t           rightBorderSpanWidth = rightBorderWidth * bytesPerPixel;
1101
1102     for(uint32_t y = 0; y < desiredHeight; ++y)
1103     {
1104       memset(&destPixelsRightBorder[y * outputSpan], BORDER_FILL_VALUE, rightBorderSpanWidth);
1105     }
1106   }
1107 }
1108
1109 Dali::Devel::PixelBuffer DownscaleBitmap(Dali::Devel::PixelBuffer bitmap,
1110                                          ImageDimensions          desired,
1111                                          FittingMode::Type        fittingMode,
1112                                          SamplingMode::Type       samplingMode)
1113 {
1114   // Source dimensions as loaded from resources (e.g. filesystem):
1115   auto bitmapWidth  = bitmap.GetWidth();
1116   auto bitmapHeight = bitmap.GetHeight();
1117   auto bitmapStride = bitmap.GetStride();
1118   // Desired dimensions (the rectangle to fit the source image to):
1119   auto desiredWidth  = desired.GetWidth();
1120   auto desiredHeight = desired.GetHeight();
1121
1122   Dali::Devel::PixelBuffer outputBitmap{bitmap};
1123
1124   // If a different size than the raw one has been requested, resize the image:
1125   if(
1126     (desiredWidth > 0.0f) && (desiredHeight > 0.0f) &&
1127     ((desiredWidth < bitmapWidth) || (desiredHeight < bitmapHeight)))
1128   {
1129     DALI_TRACE_BEGIN_WITH_MESSAGE_GENERATOR(gTraceFilter, "DALI_DOWNSCALE_BITMAP", [&](std::ostringstream& oss) {
1130       oss << "[origin:" << bitmapWidth << "x" << bitmapHeight << " ";
1131       oss << "desired:" << desiredWidth << "x" << desiredHeight << " ";
1132       oss << "fittingMode:" << fittingMode << " ";
1133       oss << "samplingMode:" << samplingMode << "]";
1134     });
1135     auto pixelFormat = bitmap.GetPixelFormat();
1136
1137     // Do the fast power of 2 iterated box filter to get to roughly the right side if the filter mode requests that:
1138     uint32_t shrunkWidth = -1, shrunkHeight = -1, outStride = -1;
1139     DownscaleInPlacePow2(bitmap.GetBuffer(), pixelFormat, bitmapWidth, bitmapHeight, bitmapStride, desiredWidth, desiredHeight, fittingMode, samplingMode, shrunkWidth, shrunkHeight, outStride);
1140
1141     // Work out the dimensions of the downscaled bitmap, given the scaling mode and desired dimensions:
1142     const ImageDimensions filteredDimensions = FitToScalingMode(ImageDimensions(desiredWidth, desiredHeight), ImageDimensions(shrunkWidth, shrunkHeight), fittingMode);
1143     const uint32_t        filteredWidth      = filteredDimensions.GetWidth();
1144     const uint32_t        filteredHeight     = filteredDimensions.GetHeight();
1145
1146     // Run a filter to scale down the bitmap if it needs it:
1147     bool filtered = false;
1148     if(filteredWidth < shrunkWidth || filteredHeight < shrunkHeight)
1149     {
1150       if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR ||
1151          samplingMode == SamplingMode::NEAREST || samplingMode == SamplingMode::BOX_THEN_NEAREST)
1152       {
1153         outputBitmap = Dali::Devel::PixelBuffer::New(filteredWidth, filteredHeight, pixelFormat);
1154
1155         if(outputBitmap)
1156         {
1157           if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1158           {
1159             LinearSample(bitmap.GetBuffer(), ImageDimensions(shrunkWidth, shrunkHeight), outStride, pixelFormat, outputBitmap.GetBuffer(), filteredDimensions);
1160           }
1161           else
1162           {
1163             PointSample(bitmap.GetBuffer(), shrunkWidth, shrunkHeight, outStride, pixelFormat, outputBitmap.GetBuffer(), filteredWidth, filteredHeight);
1164           }
1165           filtered = true;
1166         }
1167       }
1168     }
1169     // Copy out the 2^x downscaled, box-filtered pixels if no secondary filter (point or linear) was applied:
1170     if(filtered == false && (shrunkWidth < bitmapWidth || shrunkHeight < bitmapHeight))
1171     {
1172       // The buffer is downscaled and it is tightly packed. We don't need to set a stride.
1173       outputBitmap = MakePixelBuffer(bitmap.GetBuffer(), pixelFormat, shrunkWidth, shrunkHeight);
1174     }
1175     DALI_TRACE_END_WITH_MESSAGE_GENERATOR(gTraceFilter, "DALI_DOWNSCALE_BITMAP", [&](std::ostringstream& oss) {
1176       oss << "[origin:" << bitmapWidth << "x" << bitmapHeight << " ";
1177       oss << "desired:" << desiredWidth << "x" << desiredHeight << " ";
1178       oss << "final:" << outputBitmap.GetWidth() << "x" << outputBitmap.GetHeight() << "]";
1179     });
1180   }
1181
1182   return outputBitmap;
1183 }
1184
1185 namespace
1186 {
1187 /**
1188  * @brief Returns whether to keep box filtering based on whether downscaled dimensions will overshoot the desired ones aty the next step.
1189  * @param test Which combination of the two dimensions matter for terminating the filtering.
1190  * @param scaledWidth The width of the current downscaled image.
1191  * @param scaledHeight The height of the current downscaled image.
1192  * @param desiredWidth The target width for the downscaling.
1193  * @param desiredHeight The target height for the downscaling.
1194  */
1195 bool ContinueScaling(BoxDimensionTest test, uint32_t scaledWidth, uint32_t scaledHeight, uint32_t desiredWidth, uint32_t desiredHeight)
1196 {
1197   bool           keepScaling = false;
1198   const uint32_t nextWidth   = scaledWidth >> 1u;
1199   const uint32_t nextHeight  = scaledHeight >> 1u;
1200
1201   if(nextWidth >= 1u && nextHeight >= 1u)
1202   {
1203     switch(test)
1204     {
1205       case BoxDimensionTestEither:
1206       {
1207         keepScaling = nextWidth >= desiredWidth || nextHeight >= desiredHeight;
1208         break;
1209       }
1210       case BoxDimensionTestBoth:
1211       {
1212         keepScaling = nextWidth >= desiredWidth && nextHeight >= desiredHeight;
1213         break;
1214       }
1215       case BoxDimensionTestX:
1216       {
1217         keepScaling = nextWidth >= desiredWidth;
1218         break;
1219       }
1220       case BoxDimensionTestY:
1221       {
1222         keepScaling = nextHeight >= desiredHeight;
1223         break;
1224       }
1225     }
1226   }
1227
1228   return keepScaling;
1229 }
1230
1231 /**
1232  * @brief A shared implementation of the overall iterative box filter
1233  * downscaling algorithm.
1234  *
1235  * Specialise this for particular pixel formats by supplying the number of bytes
1236  * per pixel and two functions: one for averaging pairs of neighbouring pixels
1237  * on a single scanline, and a second for averaging pixels at corresponding
1238  * positions on different scanlines.
1239  **/
1240 template<
1241   int BYTES_PER_PIXEL,
1242   void (*HalveScanlineInPlace)(uint8_t* const pixels, const uint32_t width),
1243   void (*AverageScanlines)(const uint8_t* const scanline1, const uint8_t* const __restrict__ scanline2, uint8_t* const outputScanline, const uint32_t width)>
1244 void DownscaleInPlacePow2Generic(uint8_t* const   pixels,
1245                                  const uint32_t   inputWidth,
1246                                  const uint32_t   inputHeight,
1247                                  const uint32_t   inputStride,
1248                                  const uint32_t   desiredWidth,
1249                                  const uint32_t   desiredHeight,
1250                                  BoxDimensionTest dimensionTest,
1251                                  uint32_t&        outWidth,
1252                                  uint32_t&        outHeight,
1253                                  uint32_t&        outStride)
1254 {
1255   if(pixels == 0)
1256   {
1257     return;
1258   }
1259   ValidateScalingParameters(inputWidth, inputHeight, desiredWidth, desiredHeight);
1260
1261   // Scale the image until it would be smaller than desired, stopping if the
1262   // resulting height or width would be less than 1:
1263   uint32_t scaledWidth = inputWidth, scaledHeight = inputHeight, stride = inputStride;
1264   while(ContinueScaling(dimensionTest, scaledWidth, scaledHeight, desiredWidth, desiredHeight))
1265   {
1266     const uint32_t lastWidth  = scaledWidth;
1267     const uint32_t lastStride = stride;
1268     scaledWidth >>= 1u;
1269     scaledHeight >>= 1u;
1270     stride = scaledWidth;
1271
1272     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Scaling to %u\t%u.\n", scaledWidth, scaledHeight);
1273
1274     const uint32_t lastScanlinePair = scaledHeight - 1;
1275
1276     // Scale pairs of scanlines until any spare one at the end is dropped:
1277     for(uint32_t y = 0; y <= lastScanlinePair; ++y)
1278     {
1279       // Scale two scanlines horizontally:
1280       HalveScanlineInPlace(&pixels[y * 2 * lastStride * BYTES_PER_PIXEL], lastWidth);
1281       HalveScanlineInPlace(&pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL], lastWidth);
1282
1283       // Scale vertical pairs of pixels while the last two scanlines are still warm in
1284       // the CPU cache(s):
1285       // Note, better access patterns for cache-coherence are possible for very large
1286       // images but even a 4k wide RGB888 image will use just 24kB of cache (4k pixels
1287       // * 3 Bpp * 2 scanlines) for two scanlines on the first iteration.
1288       AverageScanlines(
1289         &pixels[y * 2 * lastStride * BYTES_PER_PIXEL],
1290         &pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL],
1291         &pixels[y * scaledWidth * BYTES_PER_PIXEL],
1292         scaledWidth);
1293     }
1294   }
1295
1296   ///@note: we could finish off with one of two mutually exclusive passes, one squashing horizontally as far as possible, and the other vertically, if we knew a following cpu point or bilinear filter would restore the desired aspect ratio.
1297   outWidth  = scaledWidth;
1298   outHeight = scaledHeight;
1299   outStride = stride;
1300 }
1301
1302 } // namespace
1303
1304 void HalveScanlineInPlaceRGB888(uint8_t* const pixels, const uint32_t width)
1305 {
1306   DebugAssertScanlineParameters(pixels, width);
1307
1308   const uint32_t lastPair = EvenDown(width - 2);
1309
1310   /**
1311    * @code
1312    *  for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1313    * {
1314    *   // Load all the byte pixel components we need:
1315    *   const uint32_t c11 = pixels[pixel * 3];
1316    *   const uint32_t c12 = pixels[pixel * 3 + 1];
1317    *   const uint32_t c13 = pixels[pixel * 3 + 2];
1318    *   const uint32_t c21 = pixels[pixel * 3 + 3];
1319    *   const uint32_t c22 = pixels[pixel * 3 + 4];
1320    *   const uint32_t c23 = pixels[pixel * 3 + 5];
1321    *
1322    *   // Save the averaged byte pixel components:
1323    *   pixels[outPixel * 3]     = static_cast<uint8_t>(AverageComponent(c11, c21));
1324    *   pixels[outPixel * 3 + 1] = static_cast<uint8_t>(AverageComponent(c12, c22));
1325    *   pixels[outPixel * 3 + 2] = static_cast<uint8_t>(AverageComponent(c13, c23));
1326    * }
1327    *   @endcode
1328    */
1329   //@ToDo : Fix here if we found that collect 12 bytes == 3 uint32_t with 4 colors, and calculate in one-operation
1330   std::uint8_t* inPixelPtr  = pixels;
1331   std::uint8_t* outPixelPtr = pixels;
1332   for(std::uint32_t scanedPixelCount = 0; scanedPixelCount <= lastPair; scanedPixelCount += 2)
1333   {
1334     *(outPixelPtr + 0) = ((*(inPixelPtr + 0) ^ *(inPixelPtr + 3)) >> 1) + (*(inPixelPtr + 0) & *(inPixelPtr + 3));
1335     *(outPixelPtr + 1) = ((*(inPixelPtr + 1) ^ *(inPixelPtr + 4)) >> 1) + (*(inPixelPtr + 1) & *(inPixelPtr + 4));
1336     *(outPixelPtr + 2) = ((*(inPixelPtr + 2) ^ *(inPixelPtr + 5)) >> 1) + (*(inPixelPtr + 2) & *(inPixelPtr + 5));
1337     inPixelPtr += 6;
1338     outPixelPtr += 3;
1339   }
1340 }
1341
1342 void HalveScanlineInPlaceRGBA8888(uint8_t* const pixels, const uint32_t width)
1343 {
1344   DebugAssertScanlineParameters(pixels, width);
1345   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1346
1347   uint32_t* const alignedPixels = reinterpret_cast<uint32_t*>(pixels);
1348
1349   const uint32_t lastPair = EvenDown(width - 2);
1350
1351   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1352   {
1353     const uint32_t averaged = AveragePixelRGBA8888(alignedPixels[pixel], alignedPixels[pixel + 1]);
1354     alignedPixels[outPixel] = averaged;
1355   }
1356 }
1357
1358 void HalveScanlineInPlaceRGB565(uint8_t* pixels, uint32_t width)
1359 {
1360   DebugAssertScanlineParameters(pixels, width);
1361   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1362
1363   uint16_t* const alignedPixels = reinterpret_cast<uint16_t*>(pixels);
1364
1365   const uint32_t lastPair = EvenDown(width - 2);
1366
1367   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1368   {
1369     const uint16_t averaged = AveragePixelRGB565(alignedPixels[pixel], alignedPixels[pixel + 1]);
1370     alignedPixels[outPixel] = averaged;
1371   }
1372 }
1373
1374 void HalveScanlineInPlace2Bytes(uint8_t* const pixels, const uint32_t width)
1375 {
1376   DebugAssertScanlineParameters(pixels, width);
1377
1378   const uint32_t lastPair = EvenDown(width - 2);
1379
1380   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1381   {
1382     /**
1383      * @code
1384      * // Load all the byte pixel components we need:
1385      * const uint32_t c11 = pixels[pixel * 2];
1386      * const uint32_t c12 = pixels[pixel * 2 + 1];
1387      * const uint32_t c21 = pixels[pixel * 2 + 2];
1388      * const uint32_t c22 = pixels[pixel * 2 + 3];
1389      *
1390      * // Save the averaged byte pixel components:
1391      * pixels[outPixel * 2]     = static_cast<uint8_t>(AverageComponent(c11, c21));
1392      * pixels[outPixel * 2 + 1] = static_cast<uint8_t>(AverageComponent(c12, c22));
1393      * @endcode
1394      */
1395     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1396     pixels[(outPixel << 1)]     = ((pixels[(pixel << 1)] ^ pixels[(pixel << 1) | 2]) >> 1) + (pixels[(pixel << 1)] & pixels[(pixel << 1) | 2]);
1397     pixels[(outPixel << 1) | 1] = ((pixels[(pixel << 1) | 1] ^ pixels[(pixel << 1) | 3]) >> 1) + (pixels[(pixel << 1) | 1] & pixels[(pixel << 1) | 3]);
1398   }
1399 }
1400
1401 void HalveScanlineInPlace1Byte(uint8_t* const pixels, const uint32_t width)
1402 {
1403   DebugAssertScanlineParameters(pixels, width);
1404
1405   const uint32_t lastPair = EvenDown(width - 2);
1406
1407   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1408   {
1409     /**
1410      * @code
1411      * // Load all the byte pixel components we need:
1412      * const uint32_t c1 = pixels[pixel];
1413      * const uint32_t c2 = pixels[pixel + 1];
1414      *
1415      * // Save the averaged byte pixel component:
1416      * pixels[outPixel] = static_cast<uint8_t>(AverageComponent(c1, c2));
1417      * @endcode
1418      */
1419     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1420     pixels[outPixel] = ((pixels[pixel] ^ pixels[pixel | 1]) >> 1) + (pixels[pixel] & pixels[pixel | 1]);
1421   }
1422 }
1423
1424 // AverageScanline
1425
1426 namespace
1427 {
1428 /**
1429  * @copydoc AverageScanlines1
1430  * @note This API average eight components in one operation.
1431  * @note Only possible if each scanline pointer's address aligned
1432  * It will give performance benifit.
1433  */
1434 inline void AverageScanlinesWithMultipleComponents(
1435   const uint8_t* const scanline1,
1436   const uint8_t* const __restrict__ scanline2,
1437   uint8_t* const outputScanline,
1438   const uint32_t totalComponentCount)
1439 {
1440   uint32_t component = 0;
1441   if(DALI_LIKELY(totalComponentCount >= 16))
1442   {
1443     // Note reinsterpret_cast from uint8_t to uint64_t (or uint32_t) and read/write only allowed
1444     // If pointer of data is aligned well.
1445     // (to avoid SIGBUS)
1446
1447     // To increase the percentage of optimized works, let we check pre-padding value of each pointer.
1448     auto scanline1Padding   = (reinterpret_cast<std::ptrdiff_t>(scanline1) & (sizeof(std::uint64_t) - 1));
1449     auto scanline2Padding   = (reinterpret_cast<std::ptrdiff_t>(scanline2) & (sizeof(std::uint64_t) - 1));
1450     auto outScanlinePadding = (reinterpret_cast<std::ptrdiff_t>(outputScanline) & (sizeof(std::uint64_t) - 1));
1451     if((scanline1Padding == scanline2Padding) && (scanline1Padding == outScanlinePadding))
1452     {
1453       const auto padding = (sizeof(std::uint64_t) - scanline1Padding) & (sizeof(std::uint64_t) - 1);
1454
1455       // Prepadding range calculate
1456       for(std::uint32_t i = 0; i < padding; ++i)
1457       {
1458         const auto& c1    = scanline1[i];
1459         const auto& c2    = scanline2[i];
1460         outputScanline[i] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1461       }
1462
1463       // Jump 8 components in one step
1464       const std::uint64_t* const scanline18Step = reinterpret_cast<const std::uint64_t* const>(scanline1 + padding);
1465       const std::uint64_t* const scanline28Step = reinterpret_cast<const std::uint64_t* const>(scanline2 + padding);
1466       std::uint64_t* const       output8step    = reinterpret_cast<std::uint64_t* const>(outputScanline + padding);
1467
1468       const std::uint32_t totalStepCount = (totalComponentCount) >> 3;
1469       component                          = (totalStepCount << 3) + padding;
1470
1471       // and for each step, calculate average of 8 bytes.
1472       for(std::uint32_t i = 0; i < totalStepCount; ++i)
1473       {
1474         const auto& c1     = *(scanline18Step + i);
1475         const auto& c2     = *(scanline28Step + i);
1476         *(output8step + i) = static_cast<std::uint64_t>((((c1 ^ c2) & 0xfefefefefefefefeull) >> 1) + (c1 & c2));
1477       }
1478     }
1479     else if(((scanline1Padding & (sizeof(std::uint32_t) - 1)) == (scanline2Padding & (sizeof(std::uint32_t) - 1))) &&
1480             ((scanline1Padding & (sizeof(std::uint32_t) - 1)) == (outScanlinePadding & (sizeof(std::uint32_t) - 1))))
1481     {
1482       const auto padding = (sizeof(std::uint64_t) - scanline1Padding) & (sizeof(std::uint32_t) - 1);
1483
1484       // Prepadding range calculate
1485       for(std::uint32_t i = 0; i < padding; ++i)
1486       {
1487         const auto& c1    = scanline1[i];
1488         const auto& c2    = scanline2[i];
1489         outputScanline[i] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1490       }
1491
1492       // Jump 4 components in one step
1493       const std::uint32_t* const scanline14Step = reinterpret_cast<const std::uint32_t* const>(scanline1 + padding);
1494       const std::uint32_t* const scanline24Step = reinterpret_cast<const std::uint32_t* const>(scanline2 + padding);
1495       std::uint32_t* const       output4step    = reinterpret_cast<std::uint32_t* const>(outputScanline + padding);
1496
1497       const std::uint32_t totalStepCount = (totalComponentCount) >> 2;
1498       component                          = (totalStepCount << 2) + padding;
1499
1500       // and for each step, calculate average of 4 bytes.
1501       for(std::uint32_t i = 0; i < totalStepCount; ++i)
1502       {
1503         const auto& c1     = *(scanline14Step + i);
1504         const auto& c2     = *(scanline24Step + i);
1505         *(output4step + i) = static_cast<std::uint32_t>((((c1 ^ c2) & 0xfefefefeu) >> 1) + (c1 & c2));
1506       }
1507     }
1508   }
1509   // remaining components calculate
1510   for(; component < totalComponentCount; ++component)
1511   {
1512     const auto& c1            = scanline1[component];
1513     const auto& c2            = scanline2[component];
1514     outputScanline[component] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1515   }
1516 }
1517
1518 } // namespace
1519
1520 void AverageScanlines1(const uint8_t* const scanline1,
1521                        const uint8_t* const __restrict__ scanline2,
1522                        uint8_t* const outputScanline,
1523                        const uint32_t width)
1524 {
1525   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width);
1526
1527   /**
1528    * @code
1529    * for(uint32_t component = 0; component < width; ++component)
1530    * {
1531    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1532    * }
1533    * @endcode
1534    */
1535   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width);
1536 }
1537
1538 void AverageScanlines2(const uint8_t* const scanline1,
1539                        const uint8_t* const __restrict__ scanline2,
1540                        uint8_t* const outputScanline,
1541                        const uint32_t width)
1542 {
1543   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1544
1545   /**
1546    * @code
1547    * for(uint32_t component = 0; component < width * 2; ++component)
1548    * {
1549    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1550    * }
1551    * @endcode
1552    */
1553   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width * 2);
1554 }
1555
1556 void AverageScanlines3(const uint8_t* const scanline1,
1557                        const uint8_t* const __restrict__ scanline2,
1558                        uint8_t* const outputScanline,
1559                        const uint32_t width)
1560 {
1561   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 3);
1562
1563   /**
1564    * @code
1565    * for(uint32_t component = 0; component < width * 3; ++component)
1566    * {
1567    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1568    * }
1569    * @endcode
1570    */
1571   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width * 3);
1572 }
1573
1574 void AverageScanlinesRGBA8888(const uint8_t* const scanline1,
1575                               const uint8_t* const __restrict__ scanline2,
1576                               uint8_t* const outputScanline,
1577                               const uint32_t width)
1578 {
1579   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 4);
1580   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1581   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1582   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1583
1584   /**
1585    * @code
1586    * const uint32_t* const alignedScanline1 = reinterpret_cast<const uint32_t*>(scanline1);
1587    * const uint32_t* const alignedScanline2 = reinterpret_cast<const uint32_t*>(scanline2);
1588    * uint32_t* const       alignedOutput    = reinterpret_cast<uint32_t*>(outputScanline);
1589    *
1590    * for(uint32_t pixel = 0; pixel < width; ++pixel)
1591    * {
1592    *   alignedOutput[pixel] = AveragePixelRGBA8888(alignedScanline1[pixel], alignedScanline2[pixel]);
1593    * }
1594    * @endcode
1595    */
1596
1597   AverageScanlinesWithMultipleComponents(scanline1, scanline2, outputScanline, width * 4u);
1598 }
1599
1600 void AverageScanlinesRGB565(const uint8_t* const scanline1,
1601                             const uint8_t* const __restrict__ scanline2,
1602                             uint8_t* const outputScanline,
1603                             const uint32_t width)
1604 {
1605   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1606   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1607   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1608   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1609
1610   const uint16_t* const alignedScanline1 = reinterpret_cast<const uint16_t*>(scanline1);
1611   const uint16_t* const alignedScanline2 = reinterpret_cast<const uint16_t*>(scanline2);
1612   uint16_t* const       alignedOutput    = reinterpret_cast<uint16_t*>(outputScanline);
1613
1614   for(uint32_t pixel = 0; pixel < width; ++pixel)
1615   {
1616     alignedOutput[pixel] = AveragePixelRGB565(alignedScanline1[pixel], alignedScanline2[pixel]);
1617   }
1618 }
1619
1620 /// Dispatch to pixel format appropriate box filter downscaling functions.
1621 void DownscaleInPlacePow2(uint8_t* const     pixels,
1622                           Pixel::Format      pixelFormat,
1623                           uint32_t           inputWidth,
1624                           uint32_t           inputHeight,
1625                           uint32_t           inputStride,
1626                           uint32_t           desiredWidth,
1627                           uint32_t           desiredHeight,
1628                           FittingMode::Type  fittingMode,
1629                           SamplingMode::Type samplingMode,
1630                           uint32_t&          outWidth,
1631                           uint32_t&          outHeight,
1632                           uint32_t&          outStride)
1633 {
1634   outWidth  = inputWidth;
1635   outHeight = inputHeight;
1636   outStride = inputStride;
1637   // Perform power of 2 iterated 4:1 box filtering if the requested filter mode requires it:
1638   if(samplingMode == SamplingMode::BOX || samplingMode == SamplingMode::BOX_THEN_NEAREST || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1639   {
1640     // Check the pixel format is one that is supported:
1641     if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::CHROMINANCE_U || pixelFormat == Pixel::CHROMINANCE_V)
1642     {
1643       const BoxDimensionTest dimensionTest = DimensionTestForScalingMode(fittingMode);
1644
1645       switch(pixelFormat)
1646       {
1647         case Pixel::RGBA8888:
1648         {
1649           Internal::Platform::DownscaleInPlacePow2RGBA8888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1650           break;
1651         }
1652         case Pixel::RGB888:
1653         {
1654           Internal::Platform::DownscaleInPlacePow2RGB888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1655           break;
1656         }
1657         case Pixel::RGB565:
1658         {
1659           Internal::Platform::DownscaleInPlacePow2RGB565(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1660           break;
1661         }
1662         case Pixel::LA88:
1663         {
1664           Internal::Platform::DownscaleInPlacePow2ComponentPair(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1665           break;
1666         }
1667         case Pixel::L8:
1668         case Pixel::A8:
1669         case Pixel::CHROMINANCE_U:
1670         case Pixel::CHROMINANCE_V:
1671         {
1672           Internal::Platform::DownscaleInPlacePow2SingleBytePerPixel(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1673           break;
1674         }
1675         default:
1676         {
1677           DALI_ASSERT_DEBUG(false && "Inner branch conditions don't match outer branch.");
1678         }
1679       }
1680     }
1681   }
1682   else
1683   {
1684     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not shrunk: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
1685   }
1686 }
1687
1688 void DownscaleInPlacePow2RGB888(uint8_t*         pixels,
1689                                 uint32_t         inputWidth,
1690                                 uint32_t         inputHeight,
1691                                 uint32_t         inputStride,
1692                                 uint32_t         desiredWidth,
1693                                 uint32_t         desiredHeight,
1694                                 BoxDimensionTest dimensionTest,
1695                                 uint32_t&        outWidth,
1696                                 uint32_t&        outHeight,
1697                                 uint32_t&        outStride)
1698 {
1699   DownscaleInPlacePow2Generic<3, HalveScanlineInPlaceRGB888, AverageScanlines3>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1700 }
1701
1702 void DownscaleInPlacePow2RGBA8888(uint8_t*         pixels,
1703                                   uint32_t         inputWidth,
1704                                   uint32_t         inputHeight,
1705                                   uint32_t         inputStride,
1706                                   uint32_t         desiredWidth,
1707                                   uint32_t         desiredHeight,
1708                                   BoxDimensionTest dimensionTest,
1709                                   uint32_t&        outWidth,
1710                                   uint32_t&        outHeight,
1711                                   uint32_t&        outStride)
1712 {
1713   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1714   DownscaleInPlacePow2Generic<4, HalveScanlineInPlaceRGBA8888, AverageScanlinesRGBA8888>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1715 }
1716
1717 void DownscaleInPlacePow2RGB565(uint8_t*         pixels,
1718                                 uint32_t         inputWidth,
1719                                 uint32_t         inputHeight,
1720                                 uint32_t         inputStride,
1721                                 uint32_t         desiredWidth,
1722                                 uint32_t         desiredHeight,
1723                                 BoxDimensionTest dimensionTest,
1724                                 uint32_t&        outWidth,
1725                                 uint32_t&        outHeight,
1726                                 uint32_t&        outStride)
1727 {
1728   DownscaleInPlacePow2Generic<2, HalveScanlineInPlaceRGB565, AverageScanlinesRGB565>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1729 }
1730
1731 /**
1732  * @copydoc DownscaleInPlacePow2RGB888
1733  *
1734  * For 2-byte formats such as lum8alpha8, but not packed 16 bit formats like RGB565.
1735  */
1736 void DownscaleInPlacePow2ComponentPair(uint8_t*         pixels,
1737                                        uint32_t         inputWidth,
1738                                        uint32_t         inputHeight,
1739                                        uint32_t         inputStride,
1740                                        uint32_t         desiredWidth,
1741                                        uint32_t         desiredHeight,
1742                                        BoxDimensionTest dimensionTest,
1743                                        uint32_t&        outWidth,
1744                                        uint32_t&        outHeight,
1745                                        uint32_t&        outStride)
1746 {
1747   DownscaleInPlacePow2Generic<2, HalveScanlineInPlace2Bytes, AverageScanlines2>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1748 }
1749
1750 void DownscaleInPlacePow2SingleBytePerPixel(uint8_t*         pixels,
1751                                             uint32_t         inputWidth,
1752                                             uint32_t         inputHeight,
1753                                             uint32_t         inputStride,
1754                                             uint32_t         desiredWidth,
1755                                             uint32_t         desiredHeight,
1756                                             BoxDimensionTest dimensionTest,
1757                                             uint32_t&        outWidth,
1758                                             uint32_t&        outHeight,
1759                                             uint32_t&        outStride)
1760 {
1761   DownscaleInPlacePow2Generic<1, HalveScanlineInPlace1Byte, AverageScanlines1>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1762 }
1763
1764 // Point sampling group below
1765
1766 namespace
1767 {
1768 /**
1769  * @brief Point sample an image to a new resolution (like GL_NEAREST).
1770  *
1771  * Template is used purely as a type-safe code generator in this one
1772  * compilation unit. Generated code is inlined into type-specific wrapper
1773  * functions below which are exported to rest of module.
1774  */
1775 template<typename PIXEL>
1776 inline void PointSampleAddressablePixels(const uint8_t* inPixels,
1777                                          uint32_t       inputWidth,
1778                                          uint32_t       inputHeight,
1779                                          uint32_t       inputStride,
1780                                          uint8_t*       outPixels,
1781                                          uint32_t       desiredWidth,
1782                                          uint32_t       desiredHeight)
1783 {
1784   DALI_ASSERT_DEBUG(((desiredWidth <= inputWidth && desiredHeight <= inputHeight) ||
1785                      outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL) || outPixels <= inPixels - desiredWidth * desiredHeight * sizeof(PIXEL)) &&
1786                     "The input and output buffers must not overlap for an upscaling.");
1787   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1788   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1789
1790   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1791   {
1792     return;
1793   }
1794   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
1795   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
1796   const uint32_t     deltaX     = (inputWidth << 16u) / desiredWidth;
1797   const uint32_t     deltaY     = (inputHeight << 16u) / desiredHeight;
1798
1799   uint32_t inY = 0;
1800   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1801   {
1802     // Round fixed point y coordinate to nearest integer:
1803     const uint32_t     integerY    = (inY + (1u << 15u)) >> 16u;
1804     const PIXEL* const inScanline  = &inAligned[inputStride * integerY];
1805     PIXEL* const       outScanline = &outAligned[desiredWidth * outY];
1806
1807     DALI_ASSERT_DEBUG(integerY < inputHeight);
1808     DALI_ASSERT_DEBUG(reinterpret_cast<const uint8_t*>(inScanline) < (inPixels + inputStride * inputHeight * sizeof(PIXEL)));
1809     DALI_ASSERT_DEBUG(reinterpret_cast<uint8_t*>(outScanline) < (outPixels + desiredWidth * desiredHeight * sizeof(PIXEL)));
1810
1811     uint32_t inX = 0;
1812     for(uint32_t outX = 0; outX < desiredWidth; ++outX)
1813     {
1814       // Round the fixed-point x coordinate to an integer:
1815       const uint32_t     integerX       = (inX + (1u << 15u)) >> 16u;
1816       const PIXEL* const inPixelAddress = &inScanline[integerX];
1817       const PIXEL        pixel          = *inPixelAddress;
1818       outScanline[outX]                 = pixel;
1819       inX += deltaX;
1820     }
1821     inY += deltaY;
1822   }
1823 }
1824
1825 } // namespace
1826
1827 // RGBA8888
1828 void PointSample4BPP(const uint8_t* inPixels,
1829                      uint32_t       inputWidth,
1830                      uint32_t       inputHeight,
1831                      uint32_t       inputStride,
1832                      uint8_t*       outPixels,
1833                      uint32_t       desiredWidth,
1834                      uint32_t       desiredHeight)
1835 {
1836   PointSampleAddressablePixels<uint32_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1837 }
1838
1839 // RGB565, LA88
1840 void PointSample2BPP(const uint8_t* inPixels,
1841                      uint32_t       inputWidth,
1842                      uint32_t       inputHeight,
1843                      uint32_t       inputStride,
1844                      uint8_t*       outPixels,
1845                      uint32_t       desiredWidth,
1846                      uint32_t       desiredHeight)
1847 {
1848   PointSampleAddressablePixels<uint16_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1849 }
1850
1851 // L8, A8
1852 void PointSample1BPP(const uint8_t* inPixels,
1853                      uint32_t       inputWidth,
1854                      uint32_t       inputHeight,
1855                      uint32_t       inputStride,
1856                      uint8_t*       outPixels,
1857                      uint32_t       desiredWidth,
1858                      uint32_t       desiredHeight)
1859 {
1860   PointSampleAddressablePixels<uint8_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1861 }
1862
1863 /* RGB888
1864  * RGB888 is a special case as its pixels are not aligned addressable units.
1865  */
1866 void PointSample3BPP(const uint8_t* inPixels,
1867                      uint32_t       inputWidth,
1868                      uint32_t       inputHeight,
1869                      uint32_t       inputStride,
1870                      uint8_t*       outPixels,
1871                      uint32_t       desiredWidth,
1872                      uint32_t       desiredHeight)
1873 {
1874   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1875   {
1876     return;
1877   }
1878   const uint32_t BYTES_PER_PIXEL = 3;
1879
1880   // Generate fixed-point 16.16 deltas in input image coordinates:
1881   const uint32_t deltaX = (inputWidth << 16u) / desiredWidth;
1882   const uint32_t deltaY = (inputHeight << 16u) / desiredHeight;
1883
1884   // Step through output image in whole integer pixel steps while tracking the
1885   // corresponding locations in the input image using 16.16 fixed-point
1886   // coordinates:
1887   uint32_t inY = 0; //< 16.16 fixed-point input image y-coord.
1888   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1889   {
1890     const uint32_t       integerY    = (inY + (1u << 15u)) >> 16u;
1891     const uint8_t* const inScanline  = &inPixels[inputStride * integerY * BYTES_PER_PIXEL];
1892     uint8_t* const       outScanline = &outPixels[desiredWidth * outY * BYTES_PER_PIXEL];
1893     uint32_t             inX         = 0; //< 16.16 fixed-point input image x-coord.
1894
1895     for(uint32_t outX = 0; outX < desiredWidth * BYTES_PER_PIXEL; outX += BYTES_PER_PIXEL)
1896     {
1897       // Round the fixed-point input coordinate to the address of the input pixel to sample:
1898       const uint32_t       integerX       = (inX + (1u << 15u)) >> 16u;
1899       const uint8_t* const inPixelAddress = &inScanline[integerX * BYTES_PER_PIXEL];
1900
1901       // Issue loads for all pixel color components up-front:
1902       const uint32_t c0 = inPixelAddress[0];
1903       const uint32_t c1 = inPixelAddress[1];
1904       const uint32_t c2 = inPixelAddress[2];
1905       ///@ToDo: Optimise - Benchmark one 32bit load that will be unaligned 2/3 of the time + 3 rotate and masks, versus these three aligned byte loads, versus using an RGB packed, aligned(1) struct and letting compiler pick a strategy.
1906
1907       // Output the pixel components:
1908       outScanline[outX]     = static_cast<uint8_t>(c0);
1909       outScanline[outX + 1] = static_cast<uint8_t>(c1);
1910       outScanline[outX + 2] = static_cast<uint8_t>(c2);
1911
1912       // Increment the fixed-point input coordinate:
1913       inX += deltaX;
1914     }
1915
1916     inY += deltaY;
1917   }
1918 }
1919
1920 // Dispatch to a format-appropriate point sampling function:
1921 void PointSample(const uint8_t* inPixels,
1922                  uint32_t       inputWidth,
1923                  uint32_t       inputHeight,
1924                  uint32_t       inputStride,
1925                  Pixel::Format  pixelFormat,
1926                  uint8_t*       outPixels,
1927                  uint32_t       desiredWidth,
1928                  uint32_t       desiredHeight)
1929 {
1930   // Check the pixel format is one that is supported:
1931   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::CHROMINANCE_U || pixelFormat == Pixel::CHROMINANCE_V)
1932   {
1933     switch(pixelFormat)
1934     {
1935       case Pixel::RGB888:
1936       {
1937         PointSample3BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1938         break;
1939       }
1940       case Pixel::RGBA8888:
1941       {
1942         PointSample4BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1943         break;
1944       }
1945       case Pixel::RGB565:
1946       case Pixel::LA88:
1947       {
1948         PointSample2BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1949         break;
1950       }
1951       case Pixel::L8:
1952       case Pixel::A8:
1953       case Pixel::CHROMINANCE_U:
1954       case Pixel::CHROMINANCE_V:
1955       {
1956         PointSample1BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1957         break;
1958       }
1959       default:
1960       {
1961         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
1962       }
1963     }
1964   }
1965   else
1966   {
1967     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not point sampled: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
1968   }
1969 }
1970
1971 // Linear sampling group below
1972
1973 namespace
1974 {
1975 /** @brief Blend 4 pixels together using horizontal and vertical weights. */
1976 inline uint8_t BilinearFilter1BPPByte(uint8_t tl, uint8_t tr, uint8_t bl, uint8_t br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1977 {
1978   return static_cast<uint8_t>(BilinearFilter1Component(tl, tr, bl, br, fractBlendHorizontal, fractBlendVertical));
1979 }
1980
1981 /** @copydoc BilinearFilter1BPPByte */
1982 inline Pixel2Bytes BilinearFilter2Bytes(Pixel2Bytes tl, Pixel2Bytes tr, Pixel2Bytes bl, Pixel2Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1983 {
1984   Pixel2Bytes pixel;
1985   pixel.l = static_cast<uint8_t>(BilinearFilter1Component(tl.l, tr.l, bl.l, br.l, fractBlendHorizontal, fractBlendVertical));
1986   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
1987   return pixel;
1988 }
1989
1990 /** @copydoc BilinearFilter1BPPByte */
1991 inline Pixel3Bytes BilinearFilterRGB888(Pixel3Bytes tl, Pixel3Bytes tr, Pixel3Bytes bl, Pixel3Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1992 {
1993   Pixel3Bytes pixel;
1994   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
1995   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
1996   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
1997   return pixel;
1998 }
1999
2000 /** @copydoc BilinearFilter1BPPByte */
2001 inline PixelRGB565 BilinearFilterRGB565(PixelRGB565 tl, PixelRGB565 tr, PixelRGB565 bl, PixelRGB565 br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2002 {
2003   const PixelRGB565 pixel = static_cast<PixelRGB565>((BilinearFilter1Component(tl >> 11u, tr >> 11u, bl >> 11u, br >> 11u, fractBlendHorizontal, fractBlendVertical) << 11u) +
2004                                                      (BilinearFilter1Component((tl >> 5u) & 63u, (tr >> 5u) & 63u, (bl >> 5u) & 63u, (br >> 5u) & 63u, fractBlendHorizontal, fractBlendVertical) << 5u) +
2005                                                      BilinearFilter1Component(tl & 31u, tr & 31u, bl & 31u, br & 31u, fractBlendHorizontal, fractBlendVertical));
2006   return pixel;
2007 }
2008
2009 /** @copydoc BilinearFilter1BPPByte */
2010 inline Pixel4Bytes BilinearFilter4Bytes(Pixel4Bytes tl, Pixel4Bytes tr, Pixel4Bytes bl, Pixel4Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
2011 {
2012   Pixel4Bytes pixel;
2013   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
2014   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
2015   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
2016   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
2017   return pixel;
2018 }
2019
2020 /**
2021  * @brief Generic version of bilinear sampling image resize function.
2022  * @note Limited to one compilation unit and exposed through type-specific
2023  * wrapper functions below.
2024  */
2025 template<
2026   typename PIXEL,
2027   PIXEL (*BilinearFilter)(PIXEL tl, PIXEL tr, PIXEL bl, PIXEL br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical),
2028   bool DEBUG_ASSERT_ALIGNMENT>
2029 inline void LinearSampleGeneric(const uint8_t* __restrict__ inPixels,
2030                                 ImageDimensions inputDimensions,
2031                                 uint32_t        inputStride,
2032                                 uint8_t* __restrict__ outPixels,
2033                                 ImageDimensions desiredDimensions)
2034 {
2035   const uint32_t inputWidth    = inputDimensions.GetWidth();
2036   const uint32_t inputHeight   = inputDimensions.GetHeight();
2037   const uint32_t desiredWidth  = desiredDimensions.GetWidth();
2038   const uint32_t desiredHeight = desiredDimensions.GetHeight();
2039
2040   DALI_ASSERT_DEBUG(((outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL)) ||
2041                      (inPixels >= outPixels + desiredWidth * desiredHeight * sizeof(PIXEL))) &&
2042                     "Input and output buffers cannot overlap.");
2043   if(DEBUG_ASSERT_ALIGNMENT)
2044   {
2045     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
2046     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
2047   }
2048
2049   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
2050   {
2051     return;
2052   }
2053   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
2054   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
2055   const uint32_t     deltaX     = (inputWidth << 16u) / desiredWidth;
2056   const uint32_t     deltaY     = (inputHeight << 16u) / desiredHeight;
2057
2058   uint32_t inY = 0;
2059   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
2060   {
2061     PIXEL* const outScanline = &outAligned[desiredWidth * outY];
2062
2063     // Find the two scanlines to blend and the weight to blend with:
2064     const uint32_t integerY1    = inY >> 16u;
2065     const uint32_t integerY2    = integerY1 + 1 >= inputHeight ? integerY1 : integerY1 + 1;
2066     const uint32_t inputYWeight = inY & 65535u;
2067
2068     DALI_ASSERT_DEBUG(integerY1 < inputHeight);
2069     DALI_ASSERT_DEBUG(integerY2 < inputHeight);
2070
2071     const PIXEL* const inScanline1 = &inAligned[inputStride * integerY1];
2072     const PIXEL* const inScanline2 = &inAligned[inputStride * integerY2];
2073
2074     uint32_t inX = 0;
2075     for(uint32_t outX = 0; outX < desiredWidth; ++outX)
2076     {
2077       // Work out the two pixel scanline offsets for this cluster of four samples:
2078       const uint32_t integerX1 = inX >> 16u;
2079       const uint32_t integerX2 = integerX1 + 1 >= inputWidth ? integerX1 : integerX1 + 1;
2080
2081       // Execute the loads:
2082       const PIXEL pixel1 = inScanline1[integerX1];
2083       const PIXEL pixel2 = inScanline2[integerX1];
2084       const PIXEL pixel3 = inScanline1[integerX2];
2085       const PIXEL pixel4 = inScanline2[integerX2];
2086       ///@ToDo Optimise - for 1 and 2  and 4 byte types to execute a single 2, 4, or 8 byte load per pair (caveat clamping) and let half of them be unaligned.
2087
2088       // Weighted bilinear filter:
2089       const uint32_t inputXWeight = inX & 65535u;
2090       outScanline[outX]           = BilinearFilter(pixel1, pixel3, pixel2, pixel4, inputXWeight, inputYWeight);
2091
2092       inX += deltaX;
2093     }
2094     inY += deltaY;
2095   }
2096 }
2097
2098 } // namespace
2099
2100 // Format-specific linear scaling instantiations:
2101
2102 void LinearSample1BPP(const uint8_t* __restrict__ inPixels,
2103                       ImageDimensions inputDimensions,
2104                       uint32_t        inputStride,
2105                       uint8_t* __restrict__ outPixels,
2106                       ImageDimensions desiredDimensions)
2107 {
2108   LinearSampleGeneric<uint8_t, BilinearFilter1BPPByte, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2109 }
2110
2111 void LinearSample2BPP(const uint8_t* __restrict__ inPixels,
2112                       ImageDimensions inputDimensions,
2113                       uint32_t        inputStride,
2114                       uint8_t* __restrict__ outPixels,
2115                       ImageDimensions desiredDimensions)
2116 {
2117   LinearSampleGeneric<Pixel2Bytes, BilinearFilter2Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2118 }
2119
2120 void LinearSampleRGB565(const uint8_t* __restrict__ inPixels,
2121                         ImageDimensions inputDimensions,
2122                         uint32_t        inputStride,
2123                         uint8_t* __restrict__ outPixels,
2124                         ImageDimensions desiredDimensions)
2125 {
2126   LinearSampleGeneric<PixelRGB565, BilinearFilterRGB565, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2127 }
2128
2129 void LinearSample3BPP(const uint8_t* __restrict__ inPixels,
2130                       ImageDimensions inputDimensions,
2131                       uint32_t        inputStride,
2132                       uint8_t* __restrict__ outPixels,
2133                       ImageDimensions desiredDimensions)
2134 {
2135   LinearSampleGeneric<Pixel3Bytes, BilinearFilterRGB888, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2136 }
2137
2138 void LinearSample4BPP(const uint8_t* __restrict__ inPixels,
2139                       ImageDimensions inputDimensions,
2140                       uint32_t        inputStride,
2141                       uint8_t* __restrict__ outPixels,
2142                       ImageDimensions desiredDimensions)
2143 {
2144   LinearSampleGeneric<Pixel4Bytes, BilinearFilter4Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2145 }
2146
2147 // Dispatch to a format-appropriate linear sampling function:
2148 void LinearSample(const uint8_t* __restrict__ inPixels,
2149                   ImageDimensions inDimensions,
2150                   uint32_t        inStride,
2151                   Pixel::Format   pixelFormat,
2152                   uint8_t* __restrict__ outPixels,
2153                   ImageDimensions outDimensions)
2154 {
2155   // Check the pixel format is one that is supported:
2156   if(pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::CHROMINANCE_U || pixelFormat == Pixel::CHROMINANCE_V)
2157   {
2158     switch(pixelFormat)
2159     {
2160       case Pixel::RGB888:
2161       {
2162         LinearSample3BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2163         break;
2164       }
2165       case Pixel::RGBA8888:
2166       {
2167         LinearSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2168         break;
2169       }
2170       case Pixel::L8:
2171       case Pixel::A8:
2172       case Pixel::CHROMINANCE_U:
2173       case Pixel::CHROMINANCE_V:
2174       {
2175         LinearSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2176         break;
2177       }
2178       case Pixel::LA88:
2179       {
2180         LinearSample2BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2181         break;
2182       }
2183       case Pixel::RGB565:
2184       {
2185         LinearSampleRGB565(inPixels, inDimensions, inStride, outPixels, outDimensions);
2186         break;
2187       }
2188       default:
2189       {
2190         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2191       }
2192     }
2193   }
2194   else
2195   {
2196     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not linear sampled: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
2197   }
2198 }
2199
2200 void Resample(const uint8_t* __restrict__ inPixels,
2201               ImageDimensions inputDimensions,
2202               uint32_t        inputStride,
2203               uint8_t* __restrict__ outPixels,
2204               ImageDimensions   desiredDimensions,
2205               Resampler::Filter filterType,
2206               int               numChannels,
2207               bool              hasAlpha)
2208 {
2209   // Got from the test.cpp of the ImageResampler lib.
2210   const float ONE_DIV_255               = 1.0f / 255.0f;
2211   const int   MAX_UNSIGNED_CHAR         = std::numeric_limits<uint8_t>::max();
2212   const int   LINEAR_TO_SRGB_TABLE_SIZE = 4096;
2213   const int   ALPHA_CHANNEL             = hasAlpha ? (numChannels - 1) : 0;
2214
2215   static bool    loadColorSpaces = true;
2216   static float   srgbToLinear[MAX_UNSIGNED_CHAR + 1];
2217   static uint8_t linearToSrgb[LINEAR_TO_SRGB_TABLE_SIZE];
2218
2219   if(loadColorSpaces) // Only create the color space conversions on the first execution
2220   {
2221     loadColorSpaces = false;
2222
2223     for(int i = 0; i <= MAX_UNSIGNED_CHAR; ++i)
2224     {
2225       srgbToLinear[i] = pow(static_cast<float>(i) * ONE_DIV_255, DEFAULT_SOURCE_GAMMA);
2226     }
2227
2228     const float invLinearToSrgbTableSize = 1.0f / static_cast<float>(LINEAR_TO_SRGB_TABLE_SIZE);
2229     const float invSourceGamma           = 1.0f / DEFAULT_SOURCE_GAMMA;
2230
2231     for(int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
2232     {
2233       int k = static_cast<int>(255.0f * pow(static_cast<float>(i) * invLinearToSrgbTableSize, invSourceGamma) + 0.5f);
2234       if(k < 0)
2235       {
2236         k = 0;
2237       }
2238       else if(k > MAX_UNSIGNED_CHAR)
2239       {
2240         k = MAX_UNSIGNED_CHAR;
2241       }
2242       linearToSrgb[i] = static_cast<uint8_t>(k);
2243     }
2244   }
2245
2246   std::vector<Resampler*>    resamplers(numChannels);
2247   std::vector<Vector<float>> samples(numChannels);
2248
2249   const int srcWidth  = inputDimensions.GetWidth();
2250   const int srcHeight = inputDimensions.GetHeight();
2251   const int dstWidth  = desiredDimensions.GetWidth();
2252   const int dstHeight = desiredDimensions.GetHeight();
2253
2254   // Now create a Resampler instance for each component to process. The first instance will create new contributor tables, which are shared by the resamplers
2255   // used for the other components (a memory and slight cache efficiency optimization).
2256   resamplers[0] = new Resampler(srcWidth,
2257                                 srcHeight,
2258                                 dstWidth,
2259                                 dstHeight,
2260                                 Resampler::BOUNDARY_CLAMP,
2261                                 0.0f,          // sample_low,
2262                                 1.0f,          // sample_high. Clamp output samples to specified range, or disable clamping if sample_low >= sample_high.
2263                                 filterType,    // The type of filter.
2264                                 NULL,          // Pclist_x,
2265                                 NULL,          // Pclist_y. Optional pointers to contributor lists from another instance of a Resampler.
2266                                 FILTER_SCALE,  // src_x_ofs,
2267                                 FILTER_SCALE); // src_y_ofs. Offset input image by specified amount (fractional values okay).
2268   samples[0].ResizeUninitialized(srcWidth);
2269   for(int i = 1; i < numChannels; ++i)
2270   {
2271     resamplers[i] = new Resampler(srcWidth,
2272                                   srcHeight,
2273                                   dstWidth,
2274                                   dstHeight,
2275                                   Resampler::BOUNDARY_CLAMP,
2276                                   0.0f,
2277                                   1.0f,
2278                                   filterType,
2279                                   resamplers[0]->get_clist_x(),
2280                                   resamplers[0]->get_clist_y(),
2281                                   FILTER_SCALE,
2282                                   FILTER_SCALE);
2283     samples[i].ResizeUninitialized(srcWidth);
2284   }
2285
2286   const int srcPitch = inputStride * numChannels;
2287   const int dstPitch = dstWidth * numChannels;
2288   int       dstY     = 0;
2289
2290   for(int srcY = 0; srcY < srcHeight; ++srcY)
2291   {
2292     const uint8_t* pSrc = &inPixels[srcY * srcPitch];
2293
2294     for(int x = 0; x < srcWidth; ++x)
2295     {
2296       for(int c = 0; c < numChannels; ++c)
2297       {
2298         if(c == ALPHA_CHANNEL && hasAlpha)
2299         {
2300           samples[c][x] = *pSrc++ * ONE_DIV_255;
2301         }
2302         else
2303         {
2304           samples[c][x] = srgbToLinear[*pSrc++];
2305         }
2306       }
2307     }
2308
2309     for(int c = 0; c < numChannels; ++c)
2310     {
2311       if(!resamplers[c]->put_line(&samples[c][0]))
2312       {
2313         DALI_ASSERT_DEBUG(!"Out of memory");
2314       }
2315     }
2316
2317     for(;;)
2318     {
2319       int compIndex;
2320       for(compIndex = 0; compIndex < numChannels; ++compIndex)
2321       {
2322         const float* pOutputSamples = resamplers[compIndex]->get_line();
2323         if(!pOutputSamples)
2324         {
2325           break;
2326         }
2327
2328         const bool isAlphaChannel = (compIndex == ALPHA_CHANNEL && hasAlpha);
2329         DALI_ASSERT_DEBUG(dstY < dstHeight);
2330         uint8_t* pDst = &outPixels[dstY * dstPitch + compIndex];
2331
2332         for(int x = 0; x < dstWidth; ++x)
2333         {
2334           if(isAlphaChannel)
2335           {
2336             int c = static_cast<int>(255.0f * pOutputSamples[x] + 0.5f);
2337             if(c < 0)
2338             {
2339               c = 0;
2340             }
2341             else if(c > MAX_UNSIGNED_CHAR)
2342             {
2343               c = MAX_UNSIGNED_CHAR;
2344             }
2345             *pDst = static_cast<uint8_t>(c);
2346           }
2347           else
2348           {
2349             int j = static_cast<int>(LINEAR_TO_SRGB_TABLE_SIZE * pOutputSamples[x] + 0.5f);
2350             if(j < 0)
2351             {
2352               j = 0;
2353             }
2354             else if(j >= LINEAR_TO_SRGB_TABLE_SIZE)
2355             {
2356               j = LINEAR_TO_SRGB_TABLE_SIZE - 1;
2357             }
2358             *pDst = linearToSrgb[j];
2359           }
2360
2361           pDst += numChannels;
2362         }
2363       }
2364       if(compIndex < numChannels)
2365       {
2366         break;
2367       }
2368
2369       ++dstY;
2370     }
2371   }
2372
2373   // Delete the resamplers.
2374   for(int i = 0; i < numChannels; ++i)
2375   {
2376     delete resamplers[i];
2377   }
2378 }
2379
2380 void LanczosSample4BPP(const uint8_t* __restrict__ inPixels,
2381                        ImageDimensions inputDimensions,
2382                        uint32_t        inputStride,
2383                        uint8_t* __restrict__ outPixels,
2384                        ImageDimensions desiredDimensions)
2385 {
2386   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 4, true);
2387 }
2388
2389 void LanczosSample1BPP(const uint8_t* __restrict__ inPixels,
2390                        ImageDimensions inputDimensions,
2391                        uint32_t        inputStride,
2392                        uint8_t* __restrict__ outPixels,
2393                        ImageDimensions desiredDimensions)
2394 {
2395   // For L8 images
2396   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 1, false);
2397 }
2398
2399 // Dispatch to a format-appropriate third-party resampling function:
2400 void LanczosSample(const uint8_t* __restrict__ inPixels,
2401                    ImageDimensions inDimensions,
2402                    uint32_t        inStride,
2403                    Pixel::Format   pixelFormat,
2404                    uint8_t* __restrict__ outPixels,
2405                    ImageDimensions outDimensions)
2406 {
2407   // Check the pixel format is one that is supported:
2408   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::BGRA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
2409   {
2410     switch(pixelFormat)
2411     {
2412       case Pixel::RGBA8888:
2413       case Pixel::BGRA8888:
2414       {
2415         LanczosSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2416         break;
2417       }
2418       case Pixel::L8:
2419       case Pixel::A8:
2420       {
2421         LanczosSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2422         break;
2423       }
2424       default:
2425       {
2426         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2427       }
2428     }
2429   }
2430   else
2431   {
2432     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not lanczos sampled: unsupported pixel format: %u.\n", static_cast<uint32_t>(pixelFormat));
2433   }
2434 }
2435
2436 void RotateByShear(const uint8_t* const pixelsIn,
2437                    uint32_t             widthIn,
2438                    uint32_t             heightIn,
2439                    uint32_t             strideIn,
2440                    uint32_t             pixelSize,
2441                    float                radians,
2442                    uint8_t*&            pixelsOut,
2443                    uint32_t&            widthOut,
2444                    uint32_t&            heightOut)
2445 {
2446   // @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
2447
2448   // Do first the fast rotations to transform the angle into a (-45..45] range.
2449
2450   bool fastRotationPerformed = false;
2451   if((radians > Math::PI_4) && (radians <= RAD_135))
2452   {
2453     // Angle in (45.0 .. 135.0]
2454     // Rotate image by 90 degrees into temporary image,
2455     // so it requires only an extra rotation angle
2456     // of -45.0 .. +45.0 to complete rotation.
2457     fastRotationPerformed = Rotate90(pixelsIn,
2458                                      widthIn,
2459                                      heightIn,
2460                                      strideIn,
2461                                      pixelSize,
2462                                      pixelsOut,
2463                                      widthOut,
2464                                      heightOut);
2465
2466     if(!fastRotationPerformed)
2467     {
2468       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2469       // The fast rotation failed.
2470       return;
2471     }
2472
2473     radians -= Math::PI_2;
2474   }
2475   else if((radians > RAD_135) && (radians <= RAD_225))
2476   {
2477     // Angle in (135.0 .. 225.0]
2478     // Rotate image by 180 degrees into temporary image,
2479     // so it requires only an extra rotation angle
2480     // of -45.0 .. +45.0 to complete rotation.
2481
2482     fastRotationPerformed = Rotate180(pixelsIn,
2483                                       widthIn,
2484                                       heightIn,
2485                                       strideIn,
2486                                       pixelSize,
2487                                       pixelsOut);
2488
2489     if(!fastRotationPerformed)
2490     {
2491       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2492       // The fast rotation failed.
2493       return;
2494     }
2495
2496     radians -= Math::PI;
2497     widthOut  = widthIn;
2498     heightOut = heightIn;
2499   }
2500   else if((radians > RAD_225) && (radians <= RAD_315))
2501   {
2502     // Angle in (225.0 .. 315.0]
2503     // Rotate image by 270 degrees into temporary image,
2504     // so it requires only an extra rotation angle
2505     // of -45.0 .. +45.0 to complete rotation.
2506
2507     fastRotationPerformed = Rotate270(pixelsIn,
2508                                       widthIn,
2509                                       heightIn,
2510                                       strideIn,
2511                                       pixelSize,
2512                                       pixelsOut,
2513                                       widthOut,
2514                                       heightOut);
2515
2516     if(!fastRotationPerformed)
2517     {
2518       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2519       // The fast rotation failed.
2520       return;
2521     }
2522
2523     radians -= RAD_270;
2524   }
2525
2526   if(fabs(radians) < Dali::Math::MACHINE_EPSILON_10)
2527   {
2528     // Nothing else to do if the angle is zero.
2529     // The rotation angle was 90, 180 or 270.
2530
2531     // @note Allocated memory by 'Fast Rotations', if any, has to be freed by the called to this function.
2532     return;
2533   }
2534
2535   const uint8_t* const                      firstHorizontalSkewPixelsIn = fastRotationPerformed ? pixelsOut : pixelsIn;
2536   std::unique_ptr<uint8_t, void (*)(void*)> tmpPixelsInPtr((fastRotationPerformed ? pixelsOut : nullptr), free);
2537
2538   uint32_t stride = fastRotationPerformed ? widthOut : strideIn;
2539
2540   // Reset the input/output
2541   widthIn   = widthOut;
2542   heightIn  = heightOut;
2543   pixelsOut = nullptr;
2544
2545   const float angleSinus   = sin(radians);
2546   const float angleCosinus = cos(radians);
2547   const float angleTangent = tan(0.5f * radians);
2548
2549   ///////////////////////////////////////
2550   // Perform 1st shear (horizontal)
2551   ///////////////////////////////////////
2552
2553   // Calculate first shear (horizontal) destination image dimensions
2554
2555   widthOut  = widthIn + static_cast<uint32_t>(fabs(angleTangent) * static_cast<float>(heightIn));
2556   heightOut = heightIn;
2557
2558   // Allocate the buffer for the 1st shear
2559   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2560
2561   if(nullptr == pixelsOut)
2562   {
2563     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2564     widthOut  = 0u;
2565     heightOut = 0u;
2566
2567     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2568
2569     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Fast rotations'.
2570     // Nothing else to do if the memory allocation fails.
2571     return;
2572   }
2573
2574   for(uint32_t y = 0u; y < heightOut; ++y)
2575   {
2576     const float shear = angleTangent * ((angleTangent >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2577
2578     const int intShear = static_cast<int>(floor(shear));
2579     HorizontalSkew(firstHorizontalSkewPixelsIn, widthIn, stride, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2580   }
2581
2582   // Reset the 'pixel in' pointer with the output of the 'First Horizontal Skew' and free the memory allocated by the 'Fast Rotations'.
2583   tmpPixelsInPtr.reset(pixelsOut);
2584   uint32_t tmpWidthIn  = widthOut;
2585   uint32_t tmpHeightIn = heightOut;
2586
2587   // Reset the input/output
2588   pixelsOut = nullptr;
2589
2590   ///////////////////////////////////////
2591   // Perform 2nd shear (vertical)
2592   ///////////////////////////////////////
2593
2594   // Calc 2nd shear (vertical) destination image dimensions
2595   heightOut = static_cast<uint32_t>(static_cast<float>(widthIn) * fabs(angleSinus) + static_cast<float>(heightIn) * angleCosinus);
2596
2597   // Allocate the buffer for the 2nd shear
2598   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2599
2600   if(nullptr == pixelsOut)
2601   {
2602     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2603     widthOut  = 0u;
2604     heightOut = 0u;
2605
2606     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2607     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'First Horizontal Skew'.
2608     // Nothing else to do if the memory allocation fails.
2609     return;
2610   }
2611
2612   // Variable skew offset
2613   float offset = angleSinus * ((angleSinus > 0.f) ? static_cast<float>(widthIn - 1u) : -(static_cast<float>(widthIn) - static_cast<float>(widthOut)));
2614
2615   uint32_t column = 0u;
2616   for(column = 0u; column < widthOut; ++column, offset -= angleSinus)
2617   {
2618     const int32_t shear = static_cast<int32_t>(floor(offset));
2619     VerticalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpHeightIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, heightOut, column, shear, offset - static_cast<float>(shear));
2620   }
2621   // Reset the 'pixel in' pointer with the output of the 'Vertical Skew' and free the memory allocated by the 'First Horizontal Skew'.
2622   // Reset the input/output
2623   tmpPixelsInPtr.reset(pixelsOut);
2624   tmpWidthIn  = widthOut;
2625   tmpHeightIn = heightOut;
2626   pixelsOut   = nullptr;
2627
2628   ///////////////////////////////////////
2629   // Perform 3rd shear (horizontal)
2630   ///////////////////////////////////////
2631
2632   // Calc 3rd shear (horizontal) destination image dimensions
2633   widthOut = static_cast<uint32_t>(static_cast<float>(heightIn) * fabs(angleSinus) + static_cast<float>(widthIn) * angleCosinus) + 1u;
2634
2635   // Allocate the buffer for the 3rd shear
2636   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2637
2638   if(nullptr == pixelsOut)
2639   {
2640     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2641     widthOut  = 0u;
2642     heightOut = 0u;
2643
2644     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2645     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2646     // Nothing else to do if the memory allocation fails.
2647     return;
2648   }
2649
2650   offset = (angleSinus >= 0.f) ? -angleSinus * angleTangent * static_cast<float>(widthIn - 1u) : angleTangent * (static_cast<float>(widthIn - 1u) * -angleSinus + (1.f - static_cast<float>(heightOut)));
2651
2652   for(uint32_t y = 0u; y < heightOut; ++y, offset += angleTangent)
2653   {
2654     const int32_t shear = static_cast<int32_t>(floor(offset));
2655     HorizontalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, y, shear, offset - static_cast<float>(shear));
2656   }
2657
2658   // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2659   // @note Allocated memory by the last 'Horizontal Skew' has to be freed by the caller to this function.
2660 }
2661
2662 void HorizontalShear(const uint8_t* const pixelsIn,
2663                      uint32_t             widthIn,
2664                      uint32_t             heightIn,
2665                      uint32_t             strideIn,
2666                      uint32_t             pixelSize,
2667                      float                radians,
2668                      uint8_t*&            pixelsOut,
2669                      uint32_t&            widthOut,
2670                      uint32_t&            heightOut)
2671 {
2672   // Calculate the destination image dimensions.
2673
2674   const float absRadians = fabs(radians);
2675
2676   if(absRadians > Math::PI_4)
2677   {
2678     // Can't shear more than 45 degrees.
2679     widthOut  = 0u;
2680     heightOut = 0u;
2681
2682     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Can't shear more than 45 degrees (PI/4 radians). radians : %f\n", radians);
2683     return;
2684   }
2685
2686   widthOut  = widthIn + static_cast<uint32_t>(ceil(absRadians * static_cast<float>(heightIn)));
2687   heightOut = heightIn;
2688
2689   // Allocate the buffer for the shear.
2690   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2691
2692   if(nullptr == pixelsOut)
2693   {
2694     DALI_LOG_ERROR("malloc is failed. request malloc size : %u x %u x %u\n", widthOut, heightOut, pixelSize);
2695     widthOut  = 0u;
2696     heightOut = 0u;
2697
2698     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2699     return;
2700   }
2701
2702   for(uint32_t y = 0u; y < heightOut; ++y)
2703   {
2704     const float shear = radians * ((radians >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2705
2706     const int32_t intShear = static_cast<int32_t>(floor(shear));
2707     HorizontalSkew(pixelsIn, widthIn, strideIn, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2708   }
2709 }
2710
2711 } /* namespace Platform */
2712 } /* namespace Internal */
2713 } /* namespace Dali */