dali/internal/imaging/common/image-operations.cpp

   1 /*
   2  * Copyright (c) 2023 Samsung Electronics Co., Ltd.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *
  16  */
  17
  18 #include <dali/internal/imaging/common/image-operations.h>
  19
  20 // EXTERNAL INCLUDES
  21 #include <dali/devel-api/adaptor-framework/image-loading.h>
  22 #include <dali/integration-api/debug.h>
  23 #include <dali/public-api/common/dali-vector.h>
  24 #include <dali/public-api/math/vector2.h>
  25 #include <stddef.h>
  26 #include <third-party/resampler/resampler.h>
  27 #include <cmath>
  28 #include <cstring>
  29 #include <limits>
  30 #include <memory>
  31
  32 // INTERNAL INCLUDES
  33
  34 namespace Dali
  35 {
  36 namespace Internal
  37 {
  38 namespace Platform
  39 {
  40 namespace
  41 {
  42 // The BORDER_FILL_VALUE is a single byte value that is used for horizontal and vertical borders.
  43 // A value of 0x00 gives us transparency for pixel buffers with an alpha channel, or black otherwise.
  44 // We can optionally use a Vector4 color here, but at reduced fill speed.
  45 const uint8_t BORDER_FILL_VALUE(0x00);
  46 // A maximum size limit for newly created bitmaps. ( 1u << 16 ) - 1 is chosen as we are using 16bit words for dimensions.
  47 const uint32_t MAXIMUM_TARGET_BITMAP_SIZE((1u << 16) - 1);
  48
  49 // Constants used by the ImageResampler.
  50 const float DEFAULT_SOURCE_GAMMA = 1.75f; ///< Default source gamma value used in the Resampler() function. Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction.
  51 const float FILTER_SCALE         = 1.f;   ///< Default filter scale value used in the Resampler() function. Filter scale - values < 1.0 cause aliasing, but create sharper looking mips.
  52
  53 const float RAD_135 = Math::PI_2 + Math::PI_4; ///< 135 degrees in radians;
  54 const float RAD_225 = RAD_135 + Math::PI_2;    ///< 225 degrees in radians;
  55 const float RAD_270 = 3.f * Math::PI_2;        ///< 270 degrees in radians;
  56 const float RAD_315 = RAD_225 + Math::PI_2;    ///< 315 degrees in radians;
  57
  58 using Integration::Bitmap;
  59 using Integration::BitmapPtr;
  60 typedef uint8_t PixelBuffer;
  61
  62 /**
  63  * @brief 4 byte pixel structure.
  64  */
  65 struct Pixel4Bytes
  66 {
  67   uint8_t r;
  68   uint8_t g;
  69   uint8_t b;
  70   uint8_t a;
  71 } __attribute__((packed, aligned(4))); //< Tell the compiler it is okay to use a single 32 bit load.
  72
  73 /**
  74  * @brief RGB888 pixel structure.
  75  */
  76 struct Pixel3Bytes
  77 {
  78   uint8_t r;
  79   uint8_t g;
  80   uint8_t b;
  81 } __attribute__((packed, aligned(1)));
  82
  83 /**
  84  * @brief RGB565 pixel typedefed from a short.
  85  *
  86  * Access fields by manual shifting and masking.
  87  */
  88 typedef uint16_t PixelRGB565;
  89
  90 /**
  91  * @brief a Pixel composed of two independent byte components.
  92  */
  93 struct Pixel2Bytes
  94 {
  95   uint8_t l;
  96   uint8_t a;
  97 } __attribute__((packed, aligned(2))); //< Tell the compiler it is okay to use a single 16 bit load.
  98
  99 #if defined(DEBUG_ENABLED)
 100 /**
 101  * Disable logging of image operations or make it verbose from the commandline
 102  * as follows (e.g., for dali demo app):
 103  * <code>
 104  * LOG_IMAGE_OPERATIONS=0 dali-demo #< off
 105  * LOG_IMAGE_OPERATIONS=3 dali-demo #< on, verbose
 106  * </code>
 107  */
 108 Debug::Filter* gImageOpsLogFilter = Debug::Filter::New(Debug::NoLogging, false, "LOG_IMAGE_OPERATIONS");
 109 #endif
 110
 111 /** @return The greatest even number less than or equal to the argument. */
 112 inline uint32_t EvenDown(const uint32_t a)
 113 {
 114   const uint32_t evened = a & ~1u;
 115   return evened;
 116 }
 117
 118 /**
 119  * @brief Log bad parameters.
 120  */
 121 void ValidateScalingParameters(const uint32_t inputWidth,
 122                                const uint32_t inputHeight,
 123                                const uint32_t desiredWidth,
 124                                const uint32_t desiredHeight)
 125 {
 126   if(desiredWidth > inputWidth || desiredHeight > inputHeight)
 127   {
 128     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Upscaling not supported (%u, %u -> %u, %u).\n", inputWidth, inputHeight, desiredWidth, desiredHeight);
 129   }
 130
 131   if(desiredWidth == 0u || desiredHeight == 0u)
 132   {
 133     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Downscaling to a zero-area target is pointless.\n");
 134   }
 135
 136   if(inputWidth == 0u || inputHeight == 0u)
 137   {
 138     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Zero area images cannot be scaled\n");
 139   }
 140 }
 141
 142 /**
 143  * @brief Do debug assertions common to all scanline halving functions.
 144  * @note Inline and in anon namespace so should boil away in release builds.
 145  */
 146 inline void DebugAssertScanlineParameters(const uint8_t* const pixels, const uint32_t width)
 147 {
 148   DALI_ASSERT_DEBUG(pixels && "Null pointer.");
 149   DALI_ASSERT_DEBUG(width > 1u && "Can't average fewer than two pixels.");
 150   DALI_ASSERT_DEBUG(width < 131072u && "Unusually wide image: are you sure you meant to pass that value in?");
 151 }
 152
 153 /**
 154  * @brief Assertions on params to functions averaging pairs of scanlines.
 155  * @note Inline as intended to boil away in release.
 156  */
 157 inline void DebugAssertDualScanlineParameters(const uint8_t* const scanline1,
 158                                               const uint8_t* const scanline2,
 159                                               uint8_t* const       outputScanline,
 160                                               const size_t         widthInComponents)
 161 {
 162   DALI_ASSERT_DEBUG(scanline1 && "Null pointer.");
 163   DALI_ASSERT_DEBUG(scanline2 && "Null pointer.");
 164   DALI_ASSERT_DEBUG(outputScanline && "Null pointer.");
 165   DALI_ASSERT_DEBUG(((scanline1 >= scanline2 + widthInComponents) || (scanline2 >= scanline1 + widthInComponents)) && "Scanlines alias.");
 166   DALI_ASSERT_DEBUG(((outputScanline >= (scanline2 + widthInComponents)) || (scanline2 >= (scanline1 + widthInComponents))) && "Scanline 2 aliases output.");
 167 }
 168
 169 /**
 170  * @brief Converts a scaling mode to the definition of which dimensions matter when box filtering as a part of that mode.
 171  */
 172 BoxDimensionTest DimensionTestForScalingMode(FittingMode::Type fittingMode)
 173 {
 174   BoxDimensionTest dimensionTest;
 175   dimensionTest = BoxDimensionTestEither;
 176
 177   switch(fittingMode)
 178   {
 179     // Shrink to fit attempts to make one or zero dimensions smaller than the
 180     // desired dimensions and one or two dimensions exactly the same as the desired
 181     // ones, so as long as one dimension is larger than the desired size, box
 182     // filtering can continue even if the second dimension is smaller than the
 183     // desired dimensions:
 184     case FittingMode::SHRINK_TO_FIT:
 185     {
 186       dimensionTest = BoxDimensionTestEither;
 187       break;
 188     }
 189     // Scale to fill mode keeps both dimensions at least as large as desired:
 190     case FittingMode::SCALE_TO_FILL:
 191     {
 192       dimensionTest = BoxDimensionTestBoth;
 193       break;
 194     }
 195     // Y dimension is irrelevant when downscaling in FIT_WIDTH mode:
 196     case FittingMode::FIT_WIDTH:
 197     {
 198       dimensionTest = BoxDimensionTestX;
 199       break;
 200     }
 201     // X Dimension is ignored by definition in FIT_HEIGHT mode:
 202     case FittingMode::FIT_HEIGHT:
 203     {
 204       dimensionTest = BoxDimensionTestY;
 205       break;
 206     }
 207   }
 208
 209   return dimensionTest;
 210 }
 211
 212 /**
 213  * @brief Work out the dimensions for a uniform scaling of the input to map it
 214  * into the target while effecting ShinkToFit scaling mode.
 215  */
 216 ImageDimensions FitForShrinkToFit(ImageDimensions target, ImageDimensions source)
 217 {
 218   // Scale the input by the least extreme of the two dimensions:
 219   const float widthScale  = target.GetX() / float(source.GetX());
 220   const float heightScale = target.GetY() / float(source.GetY());
 221   const float scale       = widthScale < heightScale ? widthScale : heightScale;
 222
 223   // Do no scaling at all if the result would increase area:
 224   if(scale >= 1.0f)
 225   {
 226     return source;
 227   }
 228
 229   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 230 }
 231
 232 /**
 233  * @brief Work out the dimensions for a uniform scaling of the input to map it
 234  * into the target while effecting SCALE_TO_FILL scaling mode.
 235  * @note An image scaled into the output dimensions will need either top and
 236  * bottom or left and right to be cropped away unless the source was pre-cropped
 237  * to match the destination aspect ratio.
 238  */
 239 ImageDimensions FitForScaleToFill(ImageDimensions target, ImageDimensions source)
 240 {
 241   DALI_ASSERT_DEBUG(source.GetX() > 0 && source.GetY() > 0 && "Zero-area rectangles should not be passed-in");
 242   // Scale the input by the least extreme of the two dimensions:
 243   const float widthScale  = target.GetX() / float(source.GetX());
 244   const float heightScale = target.GetY() / float(source.GetY());
 245   const float scale       = widthScale > heightScale ? widthScale : heightScale;
 246
 247   // Do no scaling at all if the result would increase area:
 248   if(scale >= 1.0f)
 249   {
 250     return source;
 251   }
 252
 253   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 254 }
 255
 256 /**
 257  * @brief Work out the dimensions for a uniform scaling of the input to map it
 258  * into the target while effecting FIT_WIDTH scaling mode.
 259  */
 260 ImageDimensions FitForFitWidth(ImageDimensions target, ImageDimensions source)
 261 {
 262   DALI_ASSERT_DEBUG(source.GetX() > 0 && "Cant fit a zero-dimension rectangle.");
 263   const float scale = target.GetX() / float(source.GetX());
 264
 265   // Do no scaling at all if the result would increase area:
 266   if(scale >= 1.0f)
 267   {
 268     return source;
 269   }
 270   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 271 }
 272
 273 /**
 274  * @brief Work out the dimensions for a uniform scaling of the input to map it
 275  * into the target while effecting FIT_HEIGHT scaling mode.
 276  */
 277 ImageDimensions FitForFitHeight(ImageDimensions target, ImageDimensions source)
 278 {
 279   DALI_ASSERT_DEBUG(source.GetY() > 0 && "Cant fit a zero-dimension rectangle.");
 280   const float scale = target.GetY() / float(source.GetY());
 281
 282   // Do no scaling at all if the result would increase area:
 283   if(scale >= 1.0f)
 284   {
 285     return source;
 286   }
 287
 288   return ImageDimensions(source.GetX() * scale + 0.5f, source.GetY() * scale + 0.5f);
 289 }
 290
 291 /**
 292  * @brief Generate the rectangle to use as the target of a pixel sampling pass
 293  * (e.g., nearest or linear).
 294  */
 295 ImageDimensions FitToScalingMode(ImageDimensions requestedSize, ImageDimensions sourceSize, FittingMode::Type fittingMode)
 296 {
 297   ImageDimensions fitDimensions;
 298   switch(fittingMode)
 299   {
 300     case FittingMode::SHRINK_TO_FIT:
 301     {
 302       fitDimensions = FitForShrinkToFit(requestedSize, sourceSize);
 303       break;
 304     }
 305     case FittingMode::SCALE_TO_FILL:
 306     {
 307       fitDimensions = FitForScaleToFill(requestedSize, sourceSize);
 308       break;
 309     }
 310     case FittingMode::FIT_WIDTH:
 311     {
 312       fitDimensions = FitForFitWidth(requestedSize, sourceSize);
 313       break;
 314     }
 315     case FittingMode::FIT_HEIGHT:
 316     {
 317       fitDimensions = FitForFitHeight(requestedSize, sourceSize);
 318       break;
 319     }
 320   }
 321
 322   return fitDimensions;
 323 }
 324
 325 /**
 326  * @brief Calculate the number of lines on the X and Y axis that need to be
 327  * either added or removed with repect to the specified fitting mode.
 328  * (e.g., nearest or linear).
 329  * @param[in]     sourceSize      The size of the source image
 330  * @param[in]     fittingMode     The fitting mode to use
 331  * @param[in/out] requestedSize   The target size that the image will be fitted to.
 332  *                                If the source image is smaller than the requested size, the source is not scaled up.
 333  *                                So we reduce the target size while keeping aspect by lowering resolution.
 334  * @param[out]    scanlinesToCrop The number of scanlines to remove from the image (can be negative to represent Y borders required)
 335  * @param[out]    columnsToCrop   The number of columns to remove from the image (can be negative to represent X borders required)
 336  */
 337 void CalculateBordersFromFittingMode(ImageDimensions sourceSize, FittingMode::Type fittingMode, ImageDimensions& requestedSize, int& scanlinesToCrop, int& columnsToCrop)
 338 {
 339   const int   sourceWidth(static_cast<int>(sourceSize.GetWidth()));
 340   const int   sourceHeight(static_cast<int>(sourceSize.GetHeight()));
 341   const float targetAspect(static_cast<float>(requestedSize.GetWidth()) / static_cast<float>(requestedSize.GetHeight()));
 342   int         finalWidth  = 0;
 343   int         finalHeight = 0;
 344
 345   switch(fittingMode)
 346   {
 347     case FittingMode::FIT_WIDTH:
 348     {
 349       finalWidth  = sourceWidth;
 350       finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 351       break;
 352     }
 353
 354     case FittingMode::FIT_HEIGHT:
 355     {
 356       finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 357       finalHeight = sourceHeight;
 358       break;
 359     }
 360
 361     case FittingMode::SHRINK_TO_FIT:
 362     {
 363       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 364       if(sourceAspect > targetAspect)
 365       {
 366         finalWidth  = sourceWidth;
 367         finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 368       }
 369       else
 370       {
 371         finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 372         finalHeight = sourceHeight;
 373       }
 374       break;
 375     }
 376
 377     case FittingMode::SCALE_TO_FILL:
 378     {
 379       const float sourceAspect(static_cast<float>(sourceWidth) / static_cast<float>(sourceHeight));
 380       if(sourceAspect > targetAspect)
 381       {
 382         finalWidth  = static_cast<int>(static_cast<float>(sourceHeight) * targetAspect);
 383         finalHeight = sourceHeight;
 384       }
 385       else
 386       {
 387         finalWidth  = sourceWidth;
 388         finalHeight = static_cast<int>(static_cast<float>(sourceWidth) / targetAspect);
 389       }
 390       break;
 391     }
 392   }
 393
 394   // Clamp if overflowed
 395   if(DALI_UNLIKELY(finalWidth > std::numeric_limits<uint16_t>::max()))
 396   {
 397     finalWidth = std::numeric_limits<uint16_t>::max();
 398   }
 399   if(DALI_UNLIKELY(finalHeight > std::numeric_limits<uint16_t>::max()))
 400   {
 401     finalHeight = std::numeric_limits<uint16_t>::max();
 402   }
 403
 404   columnsToCrop   = -(finalWidth - sourceWidth);
 405   scanlinesToCrop = -(finalHeight - sourceHeight);
 406
 407   requestedSize.SetWidth(static_cast<uint16_t>(finalWidth));
 408   requestedSize.SetHeight(static_cast<uint16_t>(finalHeight));
 409 }
 410
 411 /**
 412  * @brief Construct a pixel buffer object from a copy of the pixel array passed in.
 413  */
 414 Dali::Devel::PixelBuffer MakePixelBuffer(const uint8_t* const pixels, Pixel::Format pixelFormat, uint32_t width, uint32_t height)
 415 {
 416   DALI_ASSERT_DEBUG(pixels && "Null bitmap buffer to copy.");
 417
 418   // Allocate a pixel buffer to hold the image passed in:
 419   auto newBitmap = Dali::Devel::PixelBuffer::New(width, height, pixelFormat);
 420
 421   // Copy over the pixels from the downscaled image that was generated in-place in the pixel buffer of the input bitmap:
 422   memcpy(newBitmap.GetBuffer(), pixels, width * height * Pixel::GetBytesPerPixel(pixelFormat));
 423   return newBitmap;
 424 }
 425
 426 /**
 427  * @brief Work out the desired width and height, accounting for zeros.
 428  *
 429  * @param[in] bitmapWidth Width of image before processing.
 430  * @param[in] bitmapHeight Height of image before processing.
 431  * @param[in] requestedWidth Width of area to scale image into. Can be zero.
 432  * @param[in] requestedHeight Height of area to scale image into. Can be zero.
 433  * @return Dimensions of area to scale image into after special rules are applied.
 434  */
 435 ImageDimensions CalculateDesiredDimensions(uint32_t bitmapWidth, uint32_t bitmapHeight, uint32_t requestedWidth, uint32_t requestedHeight)
 436 {
 437   uint32_t maxSize = Dali::GetMaxTextureSize();
 438
 439   // If no dimensions have been requested, default to the source ones:
 440   if(requestedWidth == 0 && requestedHeight == 0)
 441   {
 442     if(bitmapWidth <= maxSize && bitmapHeight <= maxSize)
 443     {
 444       return ImageDimensions(bitmapWidth, bitmapHeight);
 445     }
 446     else
 447     {
 448       // Calculate the size from the max texture size and the source image aspect ratio
 449       if(bitmapWidth > bitmapHeight)
 450       {
 451         return ImageDimensions(maxSize, bitmapHeight * maxSize / static_cast<float>(bitmapWidth) + 0.5f);
 452       }
 453       else
 454       {
 455         return ImageDimensions(bitmapWidth * maxSize / static_cast<float>(bitmapHeight) + 0.5f, maxSize);
 456       }
 457     }
 458   }
 459
 460   // If both dimensions have values requested, use them both:
 461   if(requestedWidth != 0 && requestedHeight != 0)
 462   {
 463     if(requestedWidth <= maxSize && requestedHeight <= maxSize)
 464     {
 465       return ImageDimensions(requestedWidth, requestedHeight);
 466     }
 467     else
 468     {
 469       // Calculate the size from the max texture size and the source image aspect ratio
 470       if(requestedWidth > requestedHeight)
 471       {
 472         return ImageDimensions(maxSize, requestedHeight * maxSize / static_cast<float>(requestedWidth) + 0.5f);
 473       }
 474       else
 475       {
 476         return ImageDimensions(requestedWidth * maxSize / static_cast<float>(requestedHeight) + 0.5f, maxSize);
 477       }
 478     }
 479   }
 480
 481   // Only one of the dimensions has been requested. Calculate the other from
 482   // the requested one and the source image aspect ratio:
 483   if(requestedWidth != 0)
 484   {
 485     requestedWidth = std::min(requestedWidth, maxSize);
 486     return ImageDimensions(requestedWidth, bitmapHeight / float(bitmapWidth) * requestedWidth + 0.5f);
 487   }
 488
 489   requestedHeight = std::min(requestedHeight, maxSize);
 490   return ImageDimensions(bitmapWidth / float(bitmapHeight) * requestedHeight + 0.5f, requestedHeight);
 491 }
 492
 493 /**
 494  * @brief Rotates the given buffer @p pixelsIn 90 degrees counter clockwise.
 495  *
 496  * @note It allocates memory for the returned @p pixelsOut buffer.
 497  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 498  * @note It may fail if malloc() fails to allocate memory.
 499  *
 500  * @param[in] pixelsIn The input buffer.
 501  * @param[in] widthIn The width of the input buffer.
 502  * @param[in] heightIn The height of the input buffer.
 503  * @param[in] strideIn The stride of the input buffer.
 504  * @param[in] pixelSize The size of the pixel.
 505  * @param[out] pixelsOut The rotated output buffer.
 506  * @param[out] widthOut The width of the output buffer.
 507  * @param[out] heightOut The height of the output buffer.
 508  *
 509  * @return Whether the rotation succeeded.
 510  */
 511 bool Rotate90(const uint8_t* const pixelsIn,
 512               uint32_t             widthIn,
 513               uint32_t             heightIn,
 514               uint32_t             strideIn,
 515               uint32_t             pixelSize,
 516               uint8_t*&            pixelsOut,
 517               uint32_t&            widthOut,
 518               uint32_t&            heightOut)
 519 {
 520   // The new size of the image.
 521   widthOut  = heightIn;
 522   heightOut = widthIn;
 523
 524   // Allocate memory for the rotated buffer.
 525   // Output buffer is tightly packed
 526   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 527   if(nullptr == pixelsOut)
 528   {
 529     widthOut  = 0u;
 530     heightOut = 0u;
 531
 532     // Return if the memory allocations fails.
 533     return false;
 534   }
 535
 536   // Rotate the buffer.
 537   for(uint32_t y = 0u; y < heightIn; ++y)
 538   {
 539     const uint32_t srcLineIndex = y * strideIn;
 540     const uint32_t dstX         = y;
 541     for(uint32_t x = 0u; x < widthIn; ++x)
 542     {
 543       const uint32_t dstY     = heightOut - x - 1u;
 544       const uint32_t dstIndex = pixelSize * (dstY * widthOut + dstX);
 545       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 546
 547       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 548       {
 549         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 550       }
 551     }
 552   }
 553
 554   return true;
 555 }
 556
 557 /**
 558  * @brief Rotates the given buffer @p pixelsIn 180 degrees counter clockwise.
 559  *
 560  * @note It allocates memory for the returned @p pixelsOut buffer.
 561  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 562  * @note It may fail if malloc() fails to allocate memory.
 563  *
 564  * @param[in] pixelsIn The input buffer.
 565  * @param[in] widthIn The width of the input buffer.
 566  * @param[in] heightIn The height of the input buffer.
 567  * @param[in] strideIn The stride of the input buffer.
 568  * @param[in] pixelSize The size of the pixel.
 569  * @param[out] pixelsOut The rotated output buffer.
 570  *
 571  * @return Whether the rotation succeeded.
 572  */
 573 bool Rotate180(const uint8_t* const pixelsIn,
 574                uint32_t             widthIn,
 575                uint32_t             heightIn,
 576                uint32_t             strideIn,
 577                uint32_t             pixelSize,
 578                uint8_t*&            pixelsOut)
 579 {
 580   // Allocate memory for the rotated buffer.
 581   // Output buffer is tightly packed
 582   pixelsOut = static_cast<uint8_t*>(malloc(widthIn * heightIn * pixelSize));
 583   if(nullptr == pixelsOut)
 584   {
 585     // Return if the memory allocations fails.
 586     return false;
 587   }
 588
 589   // Rotate the buffer.
 590   for(uint32_t y = 0u; y < heightIn; ++y)
 591   {
 592     const uint32_t srcLineIndex = y * strideIn;
 593     const uint32_t dstY         = heightIn - y - 1u;
 594     for(uint32_t x = 0u; x < widthIn; ++x)
 595     {
 596       const uint32_t dstX     = widthIn - x - 1u;
 597       const uint32_t dstIndex = pixelSize * (dstY * widthIn + dstX);
 598       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 599
 600       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 601       {
 602         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 603       }
 604     }
 605   }
 606
 607   return true;
 608 }
 609
 610 /**
 611  * @brief Rotates the given buffer @p pixelsIn 270 degrees counter clockwise.
 612  *
 613  * @note It allocates memory for the returned @p pixelsOut buffer.
 614  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 615  * @note It may fail if malloc() fails to allocate memory.
 616  *
 617  * @param[in] pixelsIn The input buffer.
 618  * @param[in] widthIn The width of the input buffer.
 619  * @param[in] heightIn The height of the input buffer.
 620  * @param[in] strideIn The stride of the input buffer.
 621  * @param[in] pixelSize The size of the pixel.
 622  * @param[out] pixelsOut The rotated output buffer.
 623  * @param[out] widthOut The width of the output buffer.
 624  * @param[out] heightOut The height of the output buffer.
 625  *
 626  * @return Whether the rotation succeeded.
 627  */
 628 bool Rotate270(const uint8_t* const pixelsIn,
 629                uint32_t             widthIn,
 630                uint32_t             heightIn,
 631                uint32_t             strideIn,
 632                uint32_t             pixelSize,
 633                uint8_t*&            pixelsOut,
 634                uint32_t&            widthOut,
 635                uint32_t&            heightOut)
 636 {
 637   // The new size of the image.
 638   widthOut  = heightIn;
 639   heightOut = widthIn;
 640
 641   // Allocate memory for the rotated buffer.
 642   // Output buffer is tightly packed
 643   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
 644   if(nullptr == pixelsOut)
 645   {
 646     widthOut  = 0u;
 647     heightOut = 0u;
 648
 649     // Return if the memory allocations fails.
 650     return false;
 651   }
 652
 653   // Rotate the buffer.
 654   for(uint32_t y = 0u; y < heightIn; ++y)
 655   {
 656     const uint32_t srcLineIndex = y * strideIn;
 657     const uint32_t dstX         = widthOut - y - 1u;
 658     for(uint32_t x = 0u; x < widthIn; ++x)
 659     {
 660       const uint32_t dstY     = x;
 661       const uint32_t dstIndex = pixelSize * (dstY * widthOut + dstX);
 662       const uint32_t srcIndex = pixelSize * (srcLineIndex + x);
 663
 664       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 665       {
 666         *(pixelsOut + dstIndex + channel) = *(pixelsIn + srcIndex + channel);
 667       }
 668     }
 669   }
 670
 671   return true;
 672 }
 673
 674 /**
 675  * @brief Skews a row horizontally (with filtered weights)
 676  *
 677  * @note Limited to 45 degree skewing only.
 678  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 679  *
 680  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 681  * @param[in] srcWidth The width of the input pixel buffer.
 682  * @param[in] srcStride The stride of the input pixel buffer.
 683  * @param[in] pixelSize The size of the pixel.
 684  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 685  * @param[in] dstWidth The width of the output pixel buffer.
 686  * @param[in] row The row index.
 687  * @param[in] offset The skew offset.
 688  * @param[in] weight The relative weight of right pixel.
 689  */
 690 void HorizontalSkew(const uint8_t* const srcBufferPtr,
 691                     uint32_t             srcWidth,
 692                     uint32_t             srcStride,
 693                     uint32_t             pixelSize,
 694                     uint8_t*&            dstBufferPtr,
 695                     uint32_t             dstWidth,
 696                     uint32_t             row,
 697                     int32_t              offset,
 698                     float                weight)
 699 {
 700   if(offset > 0)
 701   {
 702     // Fill gap left of skew with background.
 703     memset(dstBufferPtr + row * pixelSize * dstWidth, 0u, pixelSize * offset);
 704   }
 705
 706   uint8_t oldLeft[4u] = {0u, 0u, 0u, 0u};
 707
 708   for(uint32_t i = 0u; i < srcWidth; ++i)
 709   {
 710     // Loop through row pixels
 711     const uint32_t srcIndex = pixelSize * (row * srcStride + i);
 712
 713     uint8_t src[4u] = {0u, 0u, 0u, 0u};
 714     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 715     {
 716       src[channel] = *(srcBufferPtr + srcIndex + channel);
 717     }
 718
 719     // Calculate weights
 720     uint8_t left[4u] = {0u, 0u, 0u, 0u};
 721     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 722     {
 723       left[channel] = static_cast<uint8_t>(static_cast<float>(src[channel]) * weight);
 724
 725       // Update left over on source
 726       src[channel] -= (left[channel] - oldLeft[channel]);
 727     }
 728
 729     // Check boundaries
 730     if((static_cast<int32_t>(i) + offset >= 0) && (i + offset < dstWidth))
 731     {
 732       const uint32_t dstIndex = pixelSize * (row * dstWidth + i + offset);
 733
 734       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 735       {
 736         *(dstBufferPtr + dstIndex + channel) = src[channel];
 737       }
 738     }
 739
 740     // Save leftover for next pixel in scan
 741     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 742     {
 743       oldLeft[channel] = left[channel];
 744     }
 745   }
 746
 747   // Go to rightmost point of skew
 748   int32_t i = std::max(static_cast<int32_t>(srcWidth) + offset, -static_cast<int32_t>(dstWidth * row));
 749   if(i < static_cast<int32_t>(dstWidth))
 750   {
 751     // If still in image bounds, put leftovers there
 752     const uint32_t dstIndex = pixelSize * (row * dstWidth + i);
 753
 754     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 755     {
 756       *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 757     }
 758
 759     // Clear to the right of the skewed line with background
 760     ++i;
 761     memset(dstBufferPtr + pixelSize * (row * dstWidth + i), 0u, pixelSize * (dstWidth - i));
 762   }
 763 }
 764
 765 /**
 766  * @brief Skews a column vertically (with filtered weights)
 767  *
 768  * @note Limited to 45 degree skewing only.
 769  * @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
 770  *
 771  * @param[in] srcBufferPtr Pointer to the input pixel buffer.
 772  * @param[in] srcWidth The width of the input pixel buffer.
 773  * @param[in] srcHeight The height of the input pixel buffer.
 774  * @param[in] srcStride The stride of the input pixel buffer.
 775  * @param[in] pixelSize The size of the pixel.
 776  * @param[in,out] dstPixelBuffer Pointer to the output pixel buffer.
 777  * @param[in] dstWidth The width of the output pixel buffer.
 778  * @param[in] dstHeight The height of the output pixel buffer.
 779  * @param[in] column The column index.
 780  * @param[in] offset The skew offset.
 781  * @param[in] weight The relative weight of uppeer pixel.
 782  */
 783 void VerticalSkew(const uint8_t* const srcBufferPtr,
 784                   uint32_t             srcWidth,
 785                   uint32_t             srcHeight,
 786                   uint32_t             srcStride,
 787                   uint32_t             pixelSize,
 788                   uint8_t*&            dstBufferPtr,
 789                   uint32_t             dstWidth,
 790                   uint32_t             dstHeight,
 791                   uint32_t             column,
 792                   int32_t              offset,
 793                   float                weight)
 794 {
 795   for(int32_t i = 0; i < offset; ++i)
 796   {
 797     // Fill gap above skew with background
 798     const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 799
 800     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 801     {
 802       *(dstBufferPtr + dstIndex + channel) = 0u;
 803     }
 804   }
 805
 806   uint8_t oldLeft[4u] = {0u, 0u, 0u, 0u};
 807
 808   int32_t yPos = 0;
 809
 810   for(uint32_t i = 0u; i < srcHeight; ++i)
 811   {
 812     // Loop through column pixels
 813     const uint32_t srcIndex = pixelSize * (i * srcStride + column);
 814
 815     uint8_t src[4u] = {0u, 0u, 0u, 0u};
 816     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 817     {
 818       src[channel] = *(srcBufferPtr + srcIndex + channel);
 819     }
 820
 821     yPos = static_cast<int32_t>(i) + offset;
 822
 823     // Calculate weights
 824     uint8_t left[4u] = {0u, 0u, 0u, 0u};
 825     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 826     {
 827       left[channel] = static_cast<uint8_t>(static_cast<float>(src[channel]) * weight);
 828       // Update left over on source
 829       src[channel] -= (left[channel] - oldLeft[channel]);
 830     }
 831
 832     // Check boundaries
 833     if((yPos >= 0) && (yPos < static_cast<int32_t>(dstHeight)))
 834     {
 835       const uint32_t dstIndex = pixelSize * (yPos * dstWidth + column);
 836
 837       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 838       {
 839         *(dstBufferPtr + dstIndex + channel) = src[channel];
 840       }
 841     }
 842
 843     // Save leftover for next pixel in scan
 844     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 845     {
 846       oldLeft[channel] = left[channel];
 847     }
 848   }
 849
 850   // Go to bottom point of skew
 851   uint32_t i = 0;
 852
 853   if(yPos >= 0)
 854   {
 855     i = static_cast<uint32_t>(yPos);
 856     if(i < dstHeight)
 857     {
 858       // If still in image bounds, put leftovers there
 859       const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 860
 861       for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 862       {
 863         *(dstBufferPtr + dstIndex + channel) = oldLeft[channel];
 864       }
 865       ++i;
 866     }
 867   }
 868
 869   while(i < dstHeight)
 870   {
 871     // Clear below skewed line with background
 872     const uint32_t dstIndex = pixelSize * (i * dstWidth + column);
 873
 874     for(uint32_t channel = 0u; channel < pixelSize; ++channel)
 875     {
 876       *(dstBufferPtr + dstIndex + channel) = 0u;
 877     }
 878     ++i;
 879   }
 880 }
 881
 882 } // namespace
 883
 884 ImageDimensions CalculateDesiredDimensions(ImageDimensions rawDimensions, ImageDimensions requestedDimensions)
 885 {
 886   return CalculateDesiredDimensions(rawDimensions.GetWidth(), rawDimensions.GetHeight(), requestedDimensions.GetWidth(), requestedDimensions.GetHeight());
 887 }
 888
 889 /**
 890  * @brief Apply cropping and padding for specified fitting mode.
 891  *
 892  * Once the bitmap has been (optionally) downscaled to an appropriate size, this method performs alterations
 893  * based on the fitting mode.
 894  *
 895  * This will add vertical or horizontal borders if necessary.
 896  * Crop the source image data vertically or horizontally if necessary.
 897  * The aspect of the source image is preserved.
 898  * If the source image is smaller than the desired size, the algorithm will modify the the newly created
 899  *   bitmaps dimensions to only be as large as necessary, as a memory saving optimization. This will cause
 900  *   GPU scaling to be performed at render time giving the same result with less texture traversal.
 901  *
 902  * @param[in] bitmap            The source pixel buffer to perform modifications on.
 903  * @param[in] desiredDimensions The target dimensions to aim to fill based on the fitting mode.
 904  * @param[in] fittingMode       The fitting mode to use.
 905  *
 906  * @return                      A new bitmap with the padding and cropping required for fitting mode applied.
 907  *                              If no modification is needed or possible, the passed in bitmap is returned.
 908  */
 909 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode);
 910
 911 /**
 912  * @brief Adds horizontal or vertical borders to the source image.
 913  *
 914  * @param[in] targetPixels     The destination image pointer to draw the borders on.
 915  * @param[in] bytesPerPixel    The number of bytes per pixel of the target pixel buffer.
 916  * @param[in] targetDimensions The dimensions of the destination image.
 917  * @param[in] padDimensions    The columns and scanlines to pad with borders.
 918  */
 919 void AddBorders(PixelBuffer* targetPixels, const uint32_t bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions);
 920
 921 Dali::Devel::PixelBuffer ApplyAttributesToBitmap(Dali::Devel::PixelBuffer bitmap, ImageDimensions dimensions, FittingMode::Type fittingMode, SamplingMode::Type samplingMode)
 922 {
 923   if(bitmap)
 924   {
 925     // Calculate the desired box, accounting for a possible zero component:
 926     const ImageDimensions desiredDimensions = CalculateDesiredDimensions(bitmap.GetWidth(), bitmap.GetHeight(), dimensions.GetWidth(), dimensions.GetHeight());
 927
 928     // If a different size than the raw one has been requested, resize the image
 929     // maximally using a repeated box filter without making it smaller than the
 930     // requested size in either dimension:
 931     bitmap = DownscaleBitmap(bitmap, desiredDimensions, fittingMode, samplingMode);
 932
 933     // Cut the bitmap according to the desired width and height so that the
 934     // resulting bitmap has the same aspect ratio as the desired dimensions.
 935     // Add crop and add borders if necessary depending on fitting mode.
 936     if(bitmap)
 937     {
 938       bitmap = CropAndPadForFittingMode(bitmap, desiredDimensions, fittingMode);
 939     }
 940   }
 941
 942   return bitmap;
 943 }
 944
 945 Dali::Devel::PixelBuffer CropAndPadForFittingMode(Dali::Devel::PixelBuffer& bitmap, ImageDimensions desiredDimensions, FittingMode::Type fittingMode)
 946 {
 947   const uint32_t inputWidth  = bitmap.GetWidth();
 948   const uint32_t inputHeight = bitmap.GetHeight();
 949   const uint32_t inputStride = bitmap.GetStride();
 950
 951   if(desiredDimensions.GetWidth() < 1u || desiredDimensions.GetHeight() < 1u)
 952   {
 953     DALI_LOG_WARNING("Image scaling aborted as desired dimensions too small (%u, %u).\n", desiredDimensions.GetWidth(), desiredDimensions.GetHeight());
 954   }
 955   else if(inputWidth != desiredDimensions.GetWidth() || inputHeight != desiredDimensions.GetHeight())
 956   {
 957     // Calculate any padding or cropping that needs to be done based on the fitting mode.
 958     // Note: If the desired size is larger than the original image, the desired size will be
 959     // reduced while maintaining the aspect, in order to save unnecessary memory usage.
 960     int scanlinesToCrop = 0;
 961     int columnsToCrop   = 0;
 962
 963     CalculateBordersFromFittingMode(ImageDimensions(inputWidth, inputHeight), fittingMode, desiredDimensions, scanlinesToCrop, columnsToCrop);
 964
 965     uint32_t desiredWidth(desiredDimensions.GetWidth());
 966     uint32_t desiredHeight(desiredDimensions.GetHeight());
 967
 968     // Action the changes by making a new bitmap with the central part of the loaded one if required.
 969     if(scanlinesToCrop != 0 || columnsToCrop != 0)
 970     {
 971       // Split the adding and removing of scanlines and columns into separate variables,
 972       // so we can use one piece of generic code to action the changes.
 973       uint32_t scanlinesToPad = 0;
 974       uint32_t columnsToPad   = 0;
 975       if(scanlinesToCrop < 0)
 976       {
 977         scanlinesToPad  = -scanlinesToCrop;
 978         scanlinesToCrop = 0;
 979       }
 980       if(columnsToCrop < 0)
 981       {
 982         columnsToPad  = -columnsToCrop;
 983         columnsToCrop = 0;
 984       }
 985
 986       // If there is no filtering, then the final image size can become very large, exit if larger than maximum.
 987       if((desiredWidth > MAXIMUM_TARGET_BITMAP_SIZE) || (desiredHeight > MAXIMUM_TARGET_BITMAP_SIZE) ||
 988          (columnsToPad > MAXIMUM_TARGET_BITMAP_SIZE) || (scanlinesToPad > MAXIMUM_TARGET_BITMAP_SIZE))
 989       {
 990         DALI_LOG_WARNING("Image scaling aborted as final dimensions too large (%u, %u).\n", desiredWidth, desiredHeight);
 991         return bitmap;
 992       }
 993
 994       // Create new PixelBuffer with the desired size.
 995       const auto pixelFormat = bitmap.GetPixelFormat();
 996
 997       auto croppedBitmap = Devel::PixelBuffer::New(desiredWidth, desiredHeight, pixelFormat);
 998
 999       // Add some pre-calculated offsets to the bitmap pointers so this is not done within a loop.
1000       // The cropping is added to the source pointer, and the padding is added to the destination.
1001       const auto               bytesPerPixel      = Pixel::GetBytesPerPixel(pixelFormat);
1002       const PixelBuffer* const sourcePixels       = bitmap.GetBuffer() + ((((scanlinesToCrop / 2) * inputStride) + (columnsToCrop / 2)) * bytesPerPixel);
1003       PixelBuffer* const       targetPixels       = croppedBitmap.GetBuffer();
1004       PixelBuffer* const       targetPixelsActive = targetPixels + ((((scanlinesToPad / 2) * desiredWidth) + (columnsToPad / 2)) * bytesPerPixel);
1005       DALI_ASSERT_DEBUG(sourcePixels && targetPixels);
1006
1007       // Copy the image data to the new bitmap.
1008       // Optimize to a single memcpy if the left and right edges don't need a crop or a pad.
1009       uint32_t outputSpan(desiredWidth * bytesPerPixel);
1010       if(columnsToCrop == 0 && columnsToPad == 0 && inputStride == inputWidth)
1011       {
1012         memcpy(targetPixelsActive, sourcePixels, (desiredHeight - scanlinesToPad) * outputSpan);
1013       }
1014       else
1015       {
1016         // The width needs to change (due to either a crop or a pad), so we copy a scanline at a time.
1017         // Precalculate any constants to optimize the inner loop.
1018         const uint32_t inputSpan(inputStride * bytesPerPixel);
1019         const uint32_t copySpan((desiredWidth - columnsToPad) * bytesPerPixel);
1020         const uint32_t scanlinesToCopy(desiredHeight - scanlinesToPad);
1021
1022         for(uint32_t y = 0; y < scanlinesToCopy; ++y)
1023         {
1024           memcpy(&targetPixelsActive[y * outputSpan], &sourcePixels[y * inputSpan], copySpan);
1025         }
1026       }
1027
1028       // Add vertical or horizontal borders to the final image (if required).
1029       desiredDimensions.SetWidth(desiredWidth);
1030       desiredDimensions.SetHeight(desiredHeight);
1031       AddBorders(croppedBitmap.GetBuffer(), bytesPerPixel, desiredDimensions, ImageDimensions(columnsToPad, scanlinesToPad));
1032       // Overwrite the loaded bitmap with the cropped version
1033       bitmap = croppedBitmap;
1034     }
1035   }
1036
1037   return bitmap;
1038 }
1039
1040 void AddBorders(PixelBuffer* targetPixels, const uint32_t bytesPerPixel, const ImageDimensions targetDimensions, const ImageDimensions padDimensions)
1041 {
1042   // Assign ints for faster access.
1043   uint32_t desiredWidth(targetDimensions.GetWidth());
1044   uint32_t desiredHeight(targetDimensions.GetHeight());
1045   uint32_t columnsToPad(padDimensions.GetWidth());
1046   uint32_t scanlinesToPad(padDimensions.GetHeight());
1047   uint32_t outputSpan(desiredWidth * bytesPerPixel);
1048
1049   // Add letterboxing (symmetrical borders) if needed.
1050   if(scanlinesToPad > 0)
1051   {
1052     // Add a top border. Note: This is (deliberately) rounded down if padding is an odd number.
1053     memset(targetPixels, BORDER_FILL_VALUE, (scanlinesToPad / 2) * outputSpan);
1054
1055     // We subtract scanlinesToPad/2 from scanlinesToPad so that we have the correct
1056     // offset for odd numbers (as the top border is 1 pixel smaller in these cases.
1057     uint32_t bottomBorderHeight = scanlinesToPad - (scanlinesToPad / 2);
1058
1059     // Bottom border.
1060     memset(&targetPixels[(desiredHeight - bottomBorderHeight) * outputSpan], BORDER_FILL_VALUE, bottomBorderHeight * outputSpan);
1061   }
1062   else if(columnsToPad > 0)
1063   {
1064     // Add a left and right border.
1065     // Left:
1066     // Pre-calculate span size outside of loop.
1067     uint32_t leftBorderSpanWidth((columnsToPad / 2) * bytesPerPixel);
1068     for(uint32_t y = 0; y < desiredHeight; ++y)
1069     {
1070       memset(&targetPixels[y * outputSpan], BORDER_FILL_VALUE, leftBorderSpanWidth);
1071     }
1072
1073     // Right:
1074     // Pre-calculate the initial x offset as it is always the same for a small optimization.
1075     // We subtract columnsToPad/2 from columnsToPad so that we have the correct
1076     // offset for odd numbers (as the left border is 1 pixel smaller in these cases.
1077     uint32_t           rightBorderWidth = columnsToPad - (columnsToPad / 2);
1078     PixelBuffer* const destPixelsRightBorder(targetPixels + ((desiredWidth - rightBorderWidth) * bytesPerPixel));
1079     uint32_t           rightBorderSpanWidth = rightBorderWidth * bytesPerPixel;
1080
1081     for(uint32_t y = 0; y < desiredHeight; ++y)
1082     {
1083       memset(&destPixelsRightBorder[y * outputSpan], BORDER_FILL_VALUE, rightBorderSpanWidth);
1084     }
1085   }
1086 }
1087
1088 Dali::Devel::PixelBuffer DownscaleBitmap(Dali::Devel::PixelBuffer bitmap,
1089                                          ImageDimensions          desired,
1090                                          FittingMode::Type        fittingMode,
1091                                          SamplingMode::Type       samplingMode)
1092 {
1093   // Source dimensions as loaded from resources (e.g. filesystem):
1094   auto bitmapWidth  = bitmap.GetWidth();
1095   auto bitmapHeight = bitmap.GetHeight();
1096   auto bitmapStride = bitmap.GetStride();
1097   // Desired dimensions (the rectangle to fit the source image to):
1098   auto desiredWidth  = desired.GetWidth();
1099   auto desiredHeight = desired.GetHeight();
1100
1101   Dali::Devel::PixelBuffer outputBitmap{bitmap};
1102
1103   // If a different size than the raw one has been requested, resize the image:
1104   if(
1105     (desiredWidth > 0.0f) && (desiredHeight > 0.0f) &&
1106     ((desiredWidth < bitmapWidth) || (desiredHeight < bitmapHeight)))
1107   {
1108     auto pixelFormat = bitmap.GetPixelFormat();
1109
1110     // Do the fast power of 2 iterated box filter to get to roughly the right side if the filter mode requests that:
1111     uint32_t shrunkWidth = -1, shrunkHeight = -1, outStride = -1;
1112     DownscaleInPlacePow2(bitmap.GetBuffer(), pixelFormat, bitmapWidth, bitmapHeight, bitmapStride, desiredWidth, desiredHeight, fittingMode, samplingMode, shrunkWidth, shrunkHeight, outStride);
1113
1114     // Work out the dimensions of the downscaled bitmap, given the scaling mode and desired dimensions:
1115     const ImageDimensions filteredDimensions = FitToScalingMode(ImageDimensions(desiredWidth, desiredHeight), ImageDimensions(shrunkWidth, shrunkHeight), fittingMode);
1116     const uint32_t        filteredWidth      = filteredDimensions.GetWidth();
1117     const uint32_t        filteredHeight     = filteredDimensions.GetHeight();
1118
1119     // Run a filter to scale down the bitmap if it needs it:
1120     bool filtered = false;
1121     if(filteredWidth < shrunkWidth || filteredHeight < shrunkHeight)
1122     {
1123       if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR ||
1124          samplingMode == SamplingMode::NEAREST || samplingMode == SamplingMode::BOX_THEN_NEAREST)
1125       {
1126         outputBitmap = Dali::Devel::PixelBuffer::New(filteredWidth, filteredHeight, pixelFormat);
1127
1128         if(outputBitmap)
1129         {
1130           if(samplingMode == SamplingMode::LINEAR || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1131           {
1132             LinearSample(bitmap.GetBuffer(), ImageDimensions(shrunkWidth, shrunkHeight), outStride, pixelFormat, outputBitmap.GetBuffer(), filteredDimensions);
1133           }
1134           else
1135           {
1136             PointSample(bitmap.GetBuffer(), shrunkWidth, shrunkHeight, outStride, pixelFormat, outputBitmap.GetBuffer(), filteredWidth, filteredHeight);
1137           }
1138           filtered = true;
1139         }
1140       }
1141     }
1142     // Copy out the 2^x downscaled, box-filtered pixels if no secondary filter (point or linear) was applied:
1143     if(filtered == false && (shrunkWidth < bitmapWidth || shrunkHeight < bitmapHeight))
1144     {
1145       // The buffer is downscaled and it is tightly packed. We don't need to set a stride.
1146       outputBitmap = MakePixelBuffer(bitmap.GetBuffer(), pixelFormat, shrunkWidth, shrunkHeight);
1147     }
1148   }
1149
1150   return outputBitmap;
1151 }
1152
1153 namespace
1154 {
1155 /**
1156  * @brief Returns whether to keep box filtering based on whether downscaled dimensions will overshoot the desired ones aty the next step.
1157  * @param test Which combination of the two dimensions matter for terminating the filtering.
1158  * @param scaledWidth The width of the current downscaled image.
1159  * @param scaledHeight The height of the current downscaled image.
1160  * @param desiredWidth The target width for the downscaling.
1161  * @param desiredHeight The target height for the downscaling.
1162  */
1163 bool ContinueScaling(BoxDimensionTest test, uint32_t scaledWidth, uint32_t scaledHeight, uint32_t desiredWidth, uint32_t desiredHeight)
1164 {
1165   bool           keepScaling = false;
1166   const uint32_t nextWidth   = scaledWidth >> 1u;
1167   const uint32_t nextHeight  = scaledHeight >> 1u;
1168
1169   if(nextWidth >= 1u && nextHeight >= 1u)
1170   {
1171     switch(test)
1172     {
1173       case BoxDimensionTestEither:
1174       {
1175         keepScaling = nextWidth >= desiredWidth || nextHeight >= desiredHeight;
1176         break;
1177       }
1178       case BoxDimensionTestBoth:
1179       {
1180         keepScaling = nextWidth >= desiredWidth && nextHeight >= desiredHeight;
1181         break;
1182       }
1183       case BoxDimensionTestX:
1184       {
1185         keepScaling = nextWidth >= desiredWidth;
1186         break;
1187       }
1188       case BoxDimensionTestY:
1189       {
1190         keepScaling = nextHeight >= desiredHeight;
1191         break;
1192       }
1193     }
1194   }
1195
1196   return keepScaling;
1197 }
1198
1199 /**
1200  * @brief A shared implementation of the overall iterative box filter
1201  * downscaling algorithm.
1202  *
1203  * Specialise this for particular pixel formats by supplying the number of bytes
1204  * per pixel and two functions: one for averaging pairs of neighbouring pixels
1205  * on a single scanline, and a second for averaging pixels at corresponding
1206  * positions on different scanlines.
1207  **/
1208 template<
1209   int BYTES_PER_PIXEL,
1210   void (*HalveScanlineInPlace)(uint8_t* const pixels, const uint32_t width),
1211   void (*AverageScanlines)(const uint8_t* const scanline1, const uint8_t* const __restrict__ scanline2, uint8_t* const outputScanline, const uint32_t width)>
1212 void DownscaleInPlacePow2Generic(uint8_t* const   pixels,
1213                                  const uint32_t   inputWidth,
1214                                  const uint32_t   inputHeight,
1215                                  const uint32_t   inputStride,
1216                                  const uint32_t   desiredWidth,
1217                                  const uint32_t   desiredHeight,
1218                                  BoxDimensionTest dimensionTest,
1219                                  uint32_t&        outWidth,
1220                                  uint32_t&        outHeight,
1221                                  uint32_t&        outStride)
1222 {
1223   if(pixels == 0)
1224   {
1225     return;
1226   }
1227   ValidateScalingParameters(inputWidth, inputHeight, desiredWidth, desiredHeight);
1228
1229   // Scale the image until it would be smaller than desired, stopping if the
1230   // resulting height or width would be less than 1:
1231   uint32_t scaledWidth = inputWidth, scaledHeight = inputHeight, stride = inputStride;
1232   while(ContinueScaling(dimensionTest, scaledWidth, scaledHeight, desiredWidth, desiredHeight))
1233   {
1234     const uint32_t lastWidth  = scaledWidth;
1235     const uint32_t lastStride = stride;
1236     scaledWidth >>= 1u;
1237     scaledHeight >>= 1u;
1238     stride = scaledWidth;
1239
1240     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Scaling to %u\t%u.\n", scaledWidth, scaledHeight);
1241
1242     const uint32_t lastScanlinePair = scaledHeight - 1;
1243
1244     // Scale pairs of scanlines until any spare one at the end is dropped:
1245     for(uint32_t y = 0; y <= lastScanlinePair; ++y)
1246     {
1247       // Scale two scanlines horizontally:
1248       HalveScanlineInPlace(&pixels[y * 2 * lastStride * BYTES_PER_PIXEL], lastWidth);
1249       HalveScanlineInPlace(&pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL], lastWidth);
1250
1251       // Scale vertical pairs of pixels while the last two scanlines are still warm in
1252       // the CPU cache(s):
1253       // Note, better access patterns for cache-coherence are possible for very large
1254       // images but even a 4k wide RGB888 image will use just 24kB of cache (4k pixels
1255       // * 3 Bpp * 2 scanlines) for two scanlines on the first iteration.
1256       AverageScanlines(
1257         &pixels[y * 2 * lastStride * BYTES_PER_PIXEL],
1258         &pixels[(y * 2 + 1) * lastStride * BYTES_PER_PIXEL],
1259         &pixels[y * scaledWidth * BYTES_PER_PIXEL],
1260         scaledWidth);
1261     }
1262   }
1263
1264   ///@note: we could finish off with one of two mutually exclusive passes, one squashing horizontally as far as possible, and the other vertically, if we knew a following cpu point or bilinear filter would restore the desired aspect ratio.
1265   outWidth  = scaledWidth;
1266   outHeight = scaledHeight;
1267   outStride = stride;
1268 }
1269
1270 } // namespace
1271
1272 void HalveScanlineInPlaceRGB888(uint8_t* const pixels, const uint32_t width)
1273 {
1274   DebugAssertScanlineParameters(pixels, width);
1275
1276   const uint32_t lastPair = EvenDown(width - 2);
1277
1278   /**
1279    * @code
1280    *  for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1281    * {
1282    *   // Load all the byte pixel components we need:
1283    *   const uint32_t c11 = pixels[pixel * 3];
1284    *   const uint32_t c12 = pixels[pixel * 3 + 1];
1285    *   const uint32_t c13 = pixels[pixel * 3 + 2];
1286    *   const uint32_t c21 = pixels[pixel * 3 + 3];
1287    *   const uint32_t c22 = pixels[pixel * 3 + 4];
1288    *   const uint32_t c23 = pixels[pixel * 3 + 5];
1289    *
1290    *   // Save the averaged byte pixel components:
1291    *   pixels[outPixel * 3]     = static_cast<uint8_t>(AverageComponent(c11, c21));
1292    *   pixels[outPixel * 3 + 1] = static_cast<uint8_t>(AverageComponent(c12, c22));
1293    *   pixels[outPixel * 3 + 2] = static_cast<uint8_t>(AverageComponent(c13, c23));
1294    * }
1295    *   @endcode
1296    */
1297   //@ToDo : Fix here if we found that collect 12 bytes == 3 uint32_t with 4 colors, and calculate in one-operation
1298   std::uint8_t* inPixelPtr  = pixels;
1299   std::uint8_t* outPixelPtr = pixels;
1300   for(std::uint32_t scanedPixelCount = 0; scanedPixelCount <= lastPair; scanedPixelCount += 2)
1301   {
1302     *(outPixelPtr + 0) = ((*(inPixelPtr + 0) ^ *(inPixelPtr + 3)) >> 1) + (*(inPixelPtr + 0) & *(inPixelPtr + 3));
1303     *(outPixelPtr + 1) = ((*(inPixelPtr + 1) ^ *(inPixelPtr + 4)) >> 1) + (*(inPixelPtr + 1) & *(inPixelPtr + 4));
1304     *(outPixelPtr + 2) = ((*(inPixelPtr + 2) ^ *(inPixelPtr + 5)) >> 1) + (*(inPixelPtr + 2) & *(inPixelPtr + 5));
1305     inPixelPtr += 6;
1306     outPixelPtr += 3;
1307   }
1308 }
1309
1310 void HalveScanlineInPlaceRGBA8888(uint8_t* const pixels, const uint32_t width)
1311 {
1312   DebugAssertScanlineParameters(pixels, width);
1313   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1314
1315   uint32_t* const alignedPixels = reinterpret_cast<uint32_t*>(pixels);
1316
1317   const uint32_t lastPair = EvenDown(width - 2);
1318
1319   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1320   {
1321     const uint32_t averaged = AveragePixelRGBA8888(alignedPixels[pixel], alignedPixels[pixel + 1]);
1322     alignedPixels[outPixel] = averaged;
1323   }
1324 }
1325
1326 void HalveScanlineInPlaceRGB565(uint8_t* pixels, uint32_t width)
1327 {
1328   DebugAssertScanlineParameters(pixels, width);
1329   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1330
1331   uint16_t* const alignedPixels = reinterpret_cast<uint16_t*>(pixels);
1332
1333   const uint32_t lastPair = EvenDown(width - 2);
1334
1335   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1336   {
1337     const uint16_t averaged = AveragePixelRGB565(alignedPixels[pixel], alignedPixels[pixel + 1]);
1338     alignedPixels[outPixel] = averaged;
1339   }
1340 }
1341
1342 void HalveScanlineInPlace2Bytes(uint8_t* const pixels, const uint32_t width)
1343 {
1344   DebugAssertScanlineParameters(pixels, width);
1345
1346   const uint32_t lastPair = EvenDown(width - 2);
1347
1348   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1349   {
1350     /**
1351      * @code
1352      * // Load all the byte pixel components we need:
1353      * const uint32_t c11 = pixels[pixel * 2];
1354      * const uint32_t c12 = pixels[pixel * 2 + 1];
1355      * const uint32_t c21 = pixels[pixel * 2 + 2];
1356      * const uint32_t c22 = pixels[pixel * 2 + 3];
1357      *
1358      * // Save the averaged byte pixel components:
1359      * pixels[outPixel * 2]     = static_cast<uint8_t>(AverageComponent(c11, c21));
1360      * pixels[outPixel * 2 + 1] = static_cast<uint8_t>(AverageComponent(c12, c22));
1361      * @endcode
1362      */
1363     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1364     pixels[(outPixel << 1)]     = ((pixels[(pixel << 1)] ^ pixels[(pixel << 1) | 2]) >> 1) + (pixels[(pixel << 1)] & pixels[(pixel << 1) | 2]);
1365     pixels[(outPixel << 1) | 1] = ((pixels[(pixel << 1) | 1] ^ pixels[(pixel << 1) | 3]) >> 1) + (pixels[(pixel << 1) | 1] & pixels[(pixel << 1) | 3]);
1366   }
1367 }
1368
1369 void HalveScanlineInPlace1Byte(uint8_t* const pixels, const uint32_t width)
1370 {
1371   DebugAssertScanlineParameters(pixels, width);
1372
1373   const uint32_t lastPair = EvenDown(width - 2);
1374
1375   for(uint32_t pixel = 0, outPixel = 0; pixel <= lastPair; pixel += 2, ++outPixel)
1376   {
1377     /**
1378      * @code
1379      * // Load all the byte pixel components we need:
1380      * const uint32_t c1 = pixels[pixel];
1381      * const uint32_t c2 = pixels[pixel + 1];
1382      *
1383      * // Save the averaged byte pixel component:
1384      * pixels[outPixel] = static_cast<uint8_t>(AverageComponent(c1, c2));
1385      * @endcode
1386      */
1387     // Note : We can assume that pixel is even number. So we can use | operation instead of + operation.
1388     pixels[outPixel] = ((pixels[pixel] ^ pixels[pixel | 1]) >> 1) + (pixels[pixel] & pixels[pixel | 1]);
1389   }
1390 }
1391
1392 // AverageScanline
1393
1394 namespace
1395 {
1396 /**
1397  * @copydoc AverageScanlines1
1398  * @note This API average eight components in one operation.
1399  * @note Only possible if each scanline pointer's address aligned
1400  * It will give performance benifit.
1401  */
1402 inline void AverageScanlinesWithEightComponents(
1403   const uint8_t* const scanline1,
1404   const uint8_t* const __restrict__ scanline2,
1405   uint8_t* const outputScanline,
1406   const uint32_t totalComponentCount)
1407 {
1408   uint32_t component = 0;
1409   if(DALI_LIKELY(totalComponentCount >= 8))
1410   {
1411     // Note reinsterpret_cast from uint8_t to uint64_t and read/write only allowed
1412     // If pointer of data is aligned well.
1413     if(((reinterpret_cast<std::ptrdiff_t>(scanline1) & (sizeof(std::uint64_t) - 1)) == 0) &&
1414        ((reinterpret_cast<std::ptrdiff_t>(scanline2) & (sizeof(std::uint64_t) - 1)) == 0) &&
1415        ((reinterpret_cast<std::ptrdiff_t>(outputScanline) & (sizeof(std::uint64_t) - 1)) == 0))
1416     {
1417       // Jump 8 components in one step
1418       const std::uint64_t* const scanline18Step = reinterpret_cast<const std::uint64_t* const>(scanline1);
1419       const std::uint64_t* const scanline28Step = reinterpret_cast<const std::uint64_t* const>(scanline2);
1420       std::uint64_t* const       output8step    = reinterpret_cast<std::uint64_t* const>(outputScanline);
1421
1422       const std::uint32_t totalStepCount = (totalComponentCount) >> 3;
1423       component                          = totalStepCount << 3;
1424
1425       // and for each step, calculate average of 8 bytes.
1426       for(std::uint32_t i = 0; i < totalStepCount; ++i)
1427       {
1428         const auto& c1     = *(scanline18Step + i);
1429         const auto& c2     = *(scanline28Step + i);
1430         *(output8step + i) = static_cast<std::uint64_t>((((c1 ^ c2) & 0xfefefefefefefefeull) >> 1) + (c1 & c2));
1431       }
1432     }
1433   }
1434   // remaining components calculate
1435   for(; component < totalComponentCount; ++component)
1436   {
1437     const auto& c1            = scanline1[component];
1438     const auto& c2            = scanline2[component];
1439     outputScanline[component] = static_cast<std::uint8_t>(((c1 ^ c2) >> 1) + (c1 & c2));
1440   }
1441 }
1442
1443 } // namespace
1444
1445 void AverageScanlines1(const uint8_t* const scanline1,
1446                        const uint8_t* const __restrict__ scanline2,
1447                        uint8_t* const outputScanline,
1448                        const uint32_t width)
1449 {
1450   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width);
1451
1452   /**
1453    * @code
1454    * for(uint32_t component = 0; component < width; ++component)
1455    * {
1456    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1457    * }
1458    * @endcode
1459    */
1460   AverageScanlinesWithEightComponents(scanline1, scanline2, outputScanline, width);
1461 }
1462
1463 void AverageScanlines2(const uint8_t* const scanline1,
1464                        const uint8_t* const __restrict__ scanline2,
1465                        uint8_t* const outputScanline,
1466                        const uint32_t width)
1467 {
1468   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1469
1470   /**
1471    * @code
1472    * for(uint32_t component = 0; component < width * 2; ++component)
1473    * {
1474    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1475    * }
1476    * @endcode
1477    */
1478   AverageScanlinesWithEightComponents(scanline1, scanline2, outputScanline, width * 2);
1479 }
1480
1481 void AverageScanlines3(const uint8_t* const scanline1,
1482                        const uint8_t* const __restrict__ scanline2,
1483                        uint8_t* const outputScanline,
1484                        const uint32_t width)
1485 {
1486   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 3);
1487
1488   /**
1489    * @code
1490    * for(uint32_t component = 0; component < width * 3; ++component)
1491    * {
1492    *   outputScanline[component] = static_cast<uint8_t>(AverageComponent(scanline1[component], scanline2[component]));
1493    * }
1494    * @endcode
1495    */
1496   AverageScanlinesWithEightComponents(scanline1, scanline2, outputScanline, width * 3);
1497 }
1498
1499 void AverageScanlinesRGBA8888(const uint8_t* const scanline1,
1500                               const uint8_t* const __restrict__ scanline2,
1501                               uint8_t* const outputScanline,
1502                               const uint32_t width)
1503 {
1504   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 4);
1505   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1506   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1507   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1508
1509   const uint32_t* const alignedScanline1 = reinterpret_cast<const uint32_t*>(scanline1);
1510   const uint32_t* const alignedScanline2 = reinterpret_cast<const uint32_t*>(scanline2);
1511   uint32_t* const       alignedOutput    = reinterpret_cast<uint32_t*>(outputScanline);
1512
1513   for(uint32_t pixel = 0; pixel < width; ++pixel)
1514   {
1515     alignedOutput[pixel] = AveragePixelRGBA8888(alignedScanline1[pixel], alignedScanline2[pixel]);
1516   }
1517 }
1518
1519 void AverageScanlinesRGB565(const uint8_t* const scanline1,
1520                             const uint8_t* const __restrict__ scanline2,
1521                             uint8_t* const outputScanline,
1522                             const uint32_t width)
1523 {
1524   DebugAssertDualScanlineParameters(scanline1, scanline2, outputScanline, width * 2);
1525   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline1) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1526   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(scanline2) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1527   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(outputScanline) & 1u) == 0u) && "Pointer should be 2-byte aligned for performance on some platforms.");
1528
1529   const uint16_t* const alignedScanline1 = reinterpret_cast<const uint16_t*>(scanline1);
1530   const uint16_t* const alignedScanline2 = reinterpret_cast<const uint16_t*>(scanline2);
1531   uint16_t* const       alignedOutput    = reinterpret_cast<uint16_t*>(outputScanline);
1532
1533   for(uint32_t pixel = 0; pixel < width; ++pixel)
1534   {
1535     alignedOutput[pixel] = AveragePixelRGB565(alignedScanline1[pixel], alignedScanline2[pixel]);
1536   }
1537 }
1538
1539 /// Dispatch to pixel format appropriate box filter downscaling functions.
1540 void DownscaleInPlacePow2(uint8_t* const     pixels,
1541                           Pixel::Format      pixelFormat,
1542                           uint32_t           inputWidth,
1543                           uint32_t           inputHeight,
1544                           uint32_t           inputStride,
1545                           uint32_t           desiredWidth,
1546                           uint32_t           desiredHeight,
1547                           FittingMode::Type  fittingMode,
1548                           SamplingMode::Type samplingMode,
1549                           uint32_t&          outWidth,
1550                           uint32_t&          outHeight,
1551                           uint32_t&          outStride)
1552 {
1553   outWidth  = inputWidth;
1554   outHeight = inputHeight;
1555   outStride = inputStride;
1556   // Perform power of 2 iterated 4:1 box filtering if the requested filter mode requires it:
1557   if(samplingMode == SamplingMode::BOX || samplingMode == SamplingMode::BOX_THEN_NEAREST || samplingMode == SamplingMode::BOX_THEN_LINEAR)
1558   {
1559     // Check the pixel format is one that is supported:
1560     if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
1561     {
1562       const BoxDimensionTest dimensionTest = DimensionTestForScalingMode(fittingMode);
1563
1564       switch(pixelFormat)
1565       {
1566         case Pixel::RGBA8888:
1567         {
1568           Internal::Platform::DownscaleInPlacePow2RGBA8888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1569           break;
1570         }
1571         case Pixel::RGB888:
1572         {
1573           Internal::Platform::DownscaleInPlacePow2RGB888(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1574           break;
1575         }
1576         case Pixel::RGB565:
1577         {
1578           Internal::Platform::DownscaleInPlacePow2RGB565(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1579           break;
1580         }
1581         case Pixel::LA88:
1582         {
1583           Internal::Platform::DownscaleInPlacePow2ComponentPair(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1584           break;
1585         }
1586         case Pixel::L8:
1587         case Pixel::A8:
1588         {
1589           Internal::Platform::DownscaleInPlacePow2SingleBytePerPixel(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1590           break;
1591         }
1592         default:
1593         {
1594           DALI_ASSERT_DEBUG(false && "Inner branch conditions don't match outer branch.");
1595         }
1596       }
1597     }
1598   }
1599   else
1600   {
1601     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not shrunk: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
1602   }
1603 }
1604
1605 void DownscaleInPlacePow2RGB888(uint8_t*         pixels,
1606                                 uint32_t         inputWidth,
1607                                 uint32_t         inputHeight,
1608                                 uint32_t         inputStride,
1609                                 uint32_t         desiredWidth,
1610                                 uint32_t         desiredHeight,
1611                                 BoxDimensionTest dimensionTest,
1612                                 uint32_t&        outWidth,
1613                                 uint32_t&        outHeight,
1614                                 uint32_t&        outStride)
1615 {
1616   DownscaleInPlacePow2Generic<3, HalveScanlineInPlaceRGB888, AverageScanlines3>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1617 }
1618
1619 void DownscaleInPlacePow2RGBA8888(uint8_t*         pixels,
1620                                   uint32_t         inputWidth,
1621                                   uint32_t         inputHeight,
1622                                   uint32_t         inputStride,
1623                                   uint32_t         desiredWidth,
1624                                   uint32_t         desiredHeight,
1625                                   BoxDimensionTest dimensionTest,
1626                                   uint32_t&        outWidth,
1627                                   uint32_t&        outHeight,
1628                                   uint32_t&        outStride)
1629 {
1630   DALI_ASSERT_DEBUG(((reinterpret_cast<ptrdiff_t>(pixels) & 3u) == 0u) && "Pointer should be 4-byte aligned for performance on some platforms.");
1631   DownscaleInPlacePow2Generic<4, HalveScanlineInPlaceRGBA8888, AverageScanlinesRGBA8888>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1632 }
1633
1634 void DownscaleInPlacePow2RGB565(uint8_t*         pixels,
1635                                 uint32_t         inputWidth,
1636                                 uint32_t         inputHeight,
1637                                 uint32_t         inputStride,
1638                                 uint32_t         desiredWidth,
1639                                 uint32_t         desiredHeight,
1640                                 BoxDimensionTest dimensionTest,
1641                                 uint32_t&        outWidth,
1642                                 uint32_t&        outHeight,
1643                                 uint32_t&        outStride)
1644 {
1645   DownscaleInPlacePow2Generic<2, HalveScanlineInPlaceRGB565, AverageScanlinesRGB565>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1646 }
1647
1648 /**
1649  * @copydoc DownscaleInPlacePow2RGB888
1650  *
1651  * For 2-byte formats such as lum8alpha8, but not packed 16 bit formats like RGB565.
1652  */
1653 void DownscaleInPlacePow2ComponentPair(uint8_t*         pixels,
1654                                        uint32_t         inputWidth,
1655                                        uint32_t         inputHeight,
1656                                        uint32_t         inputStride,
1657                                        uint32_t         desiredWidth,
1658                                        uint32_t         desiredHeight,
1659                                        BoxDimensionTest dimensionTest,
1660                                        uint32_t&        outWidth,
1661                                        uint32_t&        outHeight,
1662                                        uint32_t&        outStride)
1663 {
1664   DownscaleInPlacePow2Generic<2, HalveScanlineInPlace2Bytes, AverageScanlines2>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1665 }
1666
1667 void DownscaleInPlacePow2SingleBytePerPixel(uint8_t*         pixels,
1668                                             uint32_t         inputWidth,
1669                                             uint32_t         inputHeight,
1670                                             uint32_t         inputStride,
1671                                             uint32_t         desiredWidth,
1672                                             uint32_t         desiredHeight,
1673                                             BoxDimensionTest dimensionTest,
1674                                             uint32_t&        outWidth,
1675                                             uint32_t&        outHeight,
1676                                             uint32_t&        outStride)
1677 {
1678   DownscaleInPlacePow2Generic<1, HalveScanlineInPlace1Byte, AverageScanlines1>(pixels, inputWidth, inputHeight, inputStride, desiredWidth, desiredHeight, dimensionTest, outWidth, outHeight, outStride);
1679 }
1680
1681 // Point sampling group below
1682
1683 namespace
1684 {
1685 /**
1686  * @brief Point sample an image to a new resolution (like GL_NEAREST).
1687  *
1688  * Template is used purely as a type-safe code generator in this one
1689  * compilation unit. Generated code is inlined into type-specific wrapper
1690  * functions below which are exported to rest of module.
1691  */
1692 template<typename PIXEL>
1693 inline void PointSampleAddressablePixels(const uint8_t* inPixels,
1694                                          uint32_t       inputWidth,
1695                                          uint32_t       inputHeight,
1696                                          uint32_t       inputStride,
1697                                          uint8_t*       outPixels,
1698                                          uint32_t       desiredWidth,
1699                                          uint32_t       desiredHeight)
1700 {
1701   DALI_ASSERT_DEBUG(((desiredWidth <= inputWidth && desiredHeight <= inputHeight) ||
1702                      outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL) || outPixels <= inPixels - desiredWidth * desiredHeight * sizeof(PIXEL)) &&
1703                     "The input and output buffers must not overlap for an upscaling.");
1704   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1705   DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1706
1707   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1708   {
1709     return;
1710   }
1711   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
1712   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
1713   const uint32_t     deltaX     = (inputWidth << 16u) / desiredWidth;
1714   const uint32_t     deltaY     = (inputHeight << 16u) / desiredHeight;
1715
1716   uint32_t inY = 0;
1717   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1718   {
1719     // Round fixed point y coordinate to nearest integer:
1720     const uint32_t     integerY    = (inY + (1u << 15u)) >> 16u;
1721     const PIXEL* const inScanline  = &inAligned[inputStride * integerY];
1722     PIXEL* const       outScanline = &outAligned[desiredWidth * outY];
1723
1724     DALI_ASSERT_DEBUG(integerY < inputHeight);
1725     DALI_ASSERT_DEBUG(reinterpret_cast<const uint8_t*>(inScanline) < (inPixels + inputStride * inputHeight * sizeof(PIXEL)));
1726     DALI_ASSERT_DEBUG(reinterpret_cast<uint8_t*>(outScanline) < (outPixels + desiredWidth * desiredHeight * sizeof(PIXEL)));
1727
1728     uint32_t inX = 0;
1729     for(uint32_t outX = 0; outX < desiredWidth; ++outX)
1730     {
1731       // Round the fixed-point x coordinate to an integer:
1732       const uint32_t     integerX       = (inX + (1u << 15u)) >> 16u;
1733       const PIXEL* const inPixelAddress = &inScanline[integerX];
1734       const PIXEL        pixel          = *inPixelAddress;
1735       outScanline[outX]                 = pixel;
1736       inX += deltaX;
1737     }
1738     inY += deltaY;
1739   }
1740 }
1741
1742 } // namespace
1743
1744 // RGBA8888
1745 void PointSample4BPP(const uint8_t* inPixels,
1746                      uint32_t       inputWidth,
1747                      uint32_t       inputHeight,
1748                      uint32_t       inputStride,
1749                      uint8_t*       outPixels,
1750                      uint32_t       desiredWidth,
1751                      uint32_t       desiredHeight)
1752 {
1753   PointSampleAddressablePixels<uint32_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1754 }
1755
1756 // RGB565, LA88
1757 void PointSample2BPP(const uint8_t* inPixels,
1758                      uint32_t       inputWidth,
1759                      uint32_t       inputHeight,
1760                      uint32_t       inputStride,
1761                      uint8_t*       outPixels,
1762                      uint32_t       desiredWidth,
1763                      uint32_t       desiredHeight)
1764 {
1765   PointSampleAddressablePixels<uint16_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1766 }
1767
1768 // L8, A8
1769 void PointSample1BPP(const uint8_t* inPixels,
1770                      uint32_t       inputWidth,
1771                      uint32_t       inputHeight,
1772                      uint32_t       inputStride,
1773                      uint8_t*       outPixels,
1774                      uint32_t       desiredWidth,
1775                      uint32_t       desiredHeight)
1776 {
1777   PointSampleAddressablePixels<uint8_t>(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1778 }
1779
1780 /* RGB888
1781  * RGB888 is a special case as its pixels are not aligned addressable units.
1782  */
1783 void PointSample3BPP(const uint8_t* inPixels,
1784                      uint32_t       inputWidth,
1785                      uint32_t       inputHeight,
1786                      uint32_t       inputStride,
1787                      uint8_t*       outPixels,
1788                      uint32_t       desiredWidth,
1789                      uint32_t       desiredHeight)
1790 {
1791   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1792   {
1793     return;
1794   }
1795   const uint32_t BYTES_PER_PIXEL = 3;
1796
1797   // Generate fixed-point 16.16 deltas in input image coordinates:
1798   const uint32_t deltaX = (inputWidth << 16u) / desiredWidth;
1799   const uint32_t deltaY = (inputHeight << 16u) / desiredHeight;
1800
1801   // Step through output image in whole integer pixel steps while tracking the
1802   // corresponding locations in the input image using 16.16 fixed-point
1803   // coordinates:
1804   uint32_t inY = 0; //< 16.16 fixed-point input image y-coord.
1805   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1806   {
1807     const uint32_t       integerY    = (inY + (1u << 15u)) >> 16u;
1808     const uint8_t* const inScanline  = &inPixels[inputStride * integerY * BYTES_PER_PIXEL];
1809     uint8_t* const       outScanline = &outPixels[desiredWidth * outY * BYTES_PER_PIXEL];
1810     uint32_t             inX         = 0; //< 16.16 fixed-point input image x-coord.
1811
1812     for(uint32_t outX = 0; outX < desiredWidth * BYTES_PER_PIXEL; outX += BYTES_PER_PIXEL)
1813     {
1814       // Round the fixed-point input coordinate to the address of the input pixel to sample:
1815       const uint32_t       integerX       = (inX + (1u << 15u)) >> 16u;
1816       const uint8_t* const inPixelAddress = &inScanline[integerX * BYTES_PER_PIXEL];
1817
1818       // Issue loads for all pixel color components up-front:
1819       const uint32_t c0 = inPixelAddress[0];
1820       const uint32_t c1 = inPixelAddress[1];
1821       const uint32_t c2 = inPixelAddress[2];
1822       ///@ToDo: Optimise - Benchmark one 32bit load that will be unaligned 2/3 of the time + 3 rotate and masks, versus these three aligned byte loads, versus using an RGB packed, aligned(1) struct and letting compiler pick a strategy.
1823
1824       // Output the pixel components:
1825       outScanline[outX]     = static_cast<uint8_t>(c0);
1826       outScanline[outX + 1] = static_cast<uint8_t>(c1);
1827       outScanline[outX + 2] = static_cast<uint8_t>(c2);
1828
1829       // Increment the fixed-point input coordinate:
1830       inX += deltaX;
1831     }
1832
1833     inY += deltaY;
1834   }
1835 }
1836
1837 // Dispatch to a format-appropriate point sampling function:
1838 void PointSample(const uint8_t* inPixels,
1839                  uint32_t       inputWidth,
1840                  uint32_t       inputHeight,
1841                  uint32_t       inputStride,
1842                  Pixel::Format  pixelFormat,
1843                  uint8_t*       outPixels,
1844                  uint32_t       desiredWidth,
1845                  uint32_t       desiredHeight)
1846 {
1847   // Check the pixel format is one that is supported:
1848   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGB565 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
1849   {
1850     switch(pixelFormat)
1851     {
1852       case Pixel::RGB888:
1853       {
1854         PointSample3BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1855         break;
1856       }
1857       case Pixel::RGBA8888:
1858       {
1859         PointSample4BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1860         break;
1861       }
1862       case Pixel::RGB565:
1863       case Pixel::LA88:
1864       {
1865         PointSample2BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1866         break;
1867       }
1868       case Pixel::L8:
1869       case Pixel::A8:
1870       {
1871         PointSample1BPP(inPixels, inputWidth, inputHeight, inputStride, outPixels, desiredWidth, desiredHeight);
1872         break;
1873       }
1874       default:
1875       {
1876         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
1877       }
1878     }
1879   }
1880   else
1881   {
1882     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not point sampled: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
1883   }
1884 }
1885
1886 // Linear sampling group below
1887
1888 namespace
1889 {
1890 /** @brief Blend 4 pixels together using horizontal and vertical weights. */
1891 inline uint8_t BilinearFilter1BPPByte(uint8_t tl, uint8_t tr, uint8_t bl, uint8_t br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1892 {
1893   return static_cast<uint8_t>(BilinearFilter1Component(tl, tr, bl, br, fractBlendHorizontal, fractBlendVertical));
1894 }
1895
1896 /** @copydoc BilinearFilter1BPPByte */
1897 inline Pixel2Bytes BilinearFilter2Bytes(Pixel2Bytes tl, Pixel2Bytes tr, Pixel2Bytes bl, Pixel2Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1898 {
1899   Pixel2Bytes pixel;
1900   pixel.l = static_cast<uint8_t>(BilinearFilter1Component(tl.l, tr.l, bl.l, br.l, fractBlendHorizontal, fractBlendVertical));
1901   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
1902   return pixel;
1903 }
1904
1905 /** @copydoc BilinearFilter1BPPByte */
1906 inline Pixel3Bytes BilinearFilterRGB888(Pixel3Bytes tl, Pixel3Bytes tr, Pixel3Bytes bl, Pixel3Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1907 {
1908   Pixel3Bytes pixel;
1909   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
1910   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
1911   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
1912   return pixel;
1913 }
1914
1915 /** @copydoc BilinearFilter1BPPByte */
1916 inline PixelRGB565 BilinearFilterRGB565(PixelRGB565 tl, PixelRGB565 tr, PixelRGB565 bl, PixelRGB565 br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1917 {
1918   const PixelRGB565 pixel = static_cast<PixelRGB565>((BilinearFilter1Component(tl >> 11u, tr >> 11u, bl >> 11u, br >> 11u, fractBlendHorizontal, fractBlendVertical) << 11u) +
1919                                                      (BilinearFilter1Component((tl >> 5u) & 63u, (tr >> 5u) & 63u, (bl >> 5u) & 63u, (br >> 5u) & 63u, fractBlendHorizontal, fractBlendVertical) << 5u) +
1920                                                      BilinearFilter1Component(tl & 31u, tr & 31u, bl & 31u, br & 31u, fractBlendHorizontal, fractBlendVertical));
1921   return pixel;
1922 }
1923
1924 /** @copydoc BilinearFilter1BPPByte */
1925 inline Pixel4Bytes BilinearFilter4Bytes(Pixel4Bytes tl, Pixel4Bytes tr, Pixel4Bytes bl, Pixel4Bytes br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical)
1926 {
1927   Pixel4Bytes pixel;
1928   pixel.r = static_cast<uint8_t>(BilinearFilter1Component(tl.r, tr.r, bl.r, br.r, fractBlendHorizontal, fractBlendVertical));
1929   pixel.g = static_cast<uint8_t>(BilinearFilter1Component(tl.g, tr.g, bl.g, br.g, fractBlendHorizontal, fractBlendVertical));
1930   pixel.b = static_cast<uint8_t>(BilinearFilter1Component(tl.b, tr.b, bl.b, br.b, fractBlendHorizontal, fractBlendVertical));
1931   pixel.a = static_cast<uint8_t>(BilinearFilter1Component(tl.a, tr.a, bl.a, br.a, fractBlendHorizontal, fractBlendVertical));
1932   return pixel;
1933 }
1934
1935 /**
1936  * @brief Generic version of bilinear sampling image resize function.
1937  * @note Limited to one compilation unit and exposed through type-specific
1938  * wrapper functions below.
1939  */
1940 template<
1941   typename PIXEL,
1942   PIXEL (*BilinearFilter)(PIXEL tl, PIXEL tr, PIXEL bl, PIXEL br, uint32_t fractBlendHorizontal, uint32_t fractBlendVertical),
1943   bool DEBUG_ASSERT_ALIGNMENT>
1944 inline void LinearSampleGeneric(const uint8_t* __restrict__ inPixels,
1945                                 ImageDimensions inputDimensions,
1946                                 uint32_t        inputStride,
1947                                 uint8_t* __restrict__ outPixels,
1948                                 ImageDimensions desiredDimensions)
1949 {
1950   const uint32_t inputWidth    = inputDimensions.GetWidth();
1951   const uint32_t inputHeight   = inputDimensions.GetHeight();
1952   const uint32_t desiredWidth  = desiredDimensions.GetWidth();
1953   const uint32_t desiredHeight = desiredDimensions.GetHeight();
1954
1955   DALI_ASSERT_DEBUG(((outPixels >= inPixels + inputStride * inputHeight * sizeof(PIXEL)) ||
1956                      (inPixels >= outPixels + desiredWidth * desiredHeight * sizeof(PIXEL))) &&
1957                     "Input and output buffers cannot overlap.");
1958   if(DEBUG_ASSERT_ALIGNMENT)
1959   {
1960     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(inPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1961     DALI_ASSERT_DEBUG(reinterpret_cast<uint64_t>(outPixels) % sizeof(PIXEL) == 0 && "Pixel pointers need to be aligned to the size of the pixels (E.g., 4 bytes for RGBA, 2 bytes for RGB565, ...).");
1962   }
1963
1964   if(inputWidth < 1u || inputHeight < 1u || desiredWidth < 1u || desiredHeight < 1u)
1965   {
1966     return;
1967   }
1968   const PIXEL* const inAligned  = reinterpret_cast<const PIXEL*>(inPixels);
1969   PIXEL* const       outAligned = reinterpret_cast<PIXEL*>(outPixels);
1970   const uint32_t     deltaX     = (inputWidth << 16u) / desiredWidth;
1971   const uint32_t     deltaY     = (inputHeight << 16u) / desiredHeight;
1972
1973   uint32_t inY = 0;
1974   for(uint32_t outY = 0; outY < desiredHeight; ++outY)
1975   {
1976     PIXEL* const outScanline = &outAligned[desiredWidth * outY];
1977
1978     // Find the two scanlines to blend and the weight to blend with:
1979     const uint32_t integerY1    = inY >> 16u;
1980     const uint32_t integerY2    = integerY1 + 1 >= inputHeight ? integerY1 : integerY1 + 1;
1981     const uint32_t inputYWeight = inY & 65535u;
1982
1983     DALI_ASSERT_DEBUG(integerY1 < inputHeight);
1984     DALI_ASSERT_DEBUG(integerY2 < inputHeight);
1985
1986     const PIXEL* const inScanline1 = &inAligned[inputStride * integerY1];
1987     const PIXEL* const inScanline2 = &inAligned[inputStride * integerY2];
1988
1989     uint32_t inX = 0;
1990     for(uint32_t outX = 0; outX < desiredWidth; ++outX)
1991     {
1992       // Work out the two pixel scanline offsets for this cluster of four samples:
1993       const uint32_t integerX1 = inX >> 16u;
1994       const uint32_t integerX2 = integerX1 + 1 >= inputWidth ? integerX1 : integerX1 + 1;
1995
1996       // Execute the loads:
1997       const PIXEL pixel1 = inScanline1[integerX1];
1998       const PIXEL pixel2 = inScanline2[integerX1];
1999       const PIXEL pixel3 = inScanline1[integerX2];
2000       const PIXEL pixel4 = inScanline2[integerX2];
2001       ///@ToDo Optimise - for 1 and 2  and 4 byte types to execute a single 2, 4, or 8 byte load per pair (caveat clamping) and let half of them be unaligned.
2002
2003       // Weighted bilinear filter:
2004       const uint32_t inputXWeight = inX & 65535u;
2005       outScanline[outX]           = BilinearFilter(pixel1, pixel3, pixel2, pixel4, inputXWeight, inputYWeight);
2006
2007       inX += deltaX;
2008     }
2009     inY += deltaY;
2010   }
2011 }
2012
2013 } // namespace
2014
2015 // Format-specific linear scaling instantiations:
2016
2017 void LinearSample1BPP(const uint8_t* __restrict__ inPixels,
2018                       ImageDimensions inputDimensions,
2019                       uint32_t        inputStride,
2020                       uint8_t* __restrict__ outPixels,
2021                       ImageDimensions desiredDimensions)
2022 {
2023   LinearSampleGeneric<uint8_t, BilinearFilter1BPPByte, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2024 }
2025
2026 void LinearSample2BPP(const uint8_t* __restrict__ inPixels,
2027                       ImageDimensions inputDimensions,
2028                       uint32_t        inputStride,
2029                       uint8_t* __restrict__ outPixels,
2030                       ImageDimensions desiredDimensions)
2031 {
2032   LinearSampleGeneric<Pixel2Bytes, BilinearFilter2Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2033 }
2034
2035 void LinearSampleRGB565(const uint8_t* __restrict__ inPixels,
2036                         ImageDimensions inputDimensions,
2037                         uint32_t        inputStride,
2038                         uint8_t* __restrict__ outPixels,
2039                         ImageDimensions desiredDimensions)
2040 {
2041   LinearSampleGeneric<PixelRGB565, BilinearFilterRGB565, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2042 }
2043
2044 void LinearSample3BPP(const uint8_t* __restrict__ inPixels,
2045                       ImageDimensions inputDimensions,
2046                       uint32_t        inputStride,
2047                       uint8_t* __restrict__ outPixels,
2048                       ImageDimensions desiredDimensions)
2049 {
2050   LinearSampleGeneric<Pixel3Bytes, BilinearFilterRGB888, false>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2051 }
2052
2053 void LinearSample4BPP(const uint8_t* __restrict__ inPixels,
2054                       ImageDimensions inputDimensions,
2055                       uint32_t        inputStride,
2056                       uint8_t* __restrict__ outPixels,
2057                       ImageDimensions desiredDimensions)
2058 {
2059   LinearSampleGeneric<Pixel4Bytes, BilinearFilter4Bytes, true>(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions);
2060 }
2061
2062 // Dispatch to a format-appropriate linear sampling function:
2063 void LinearSample(const uint8_t* __restrict__ inPixels,
2064                   ImageDimensions inDimensions,
2065                   uint32_t        inStride,
2066                   Pixel::Format   pixelFormat,
2067                   uint8_t* __restrict__ outPixels,
2068                   ImageDimensions outDimensions)
2069 {
2070   // Check the pixel format is one that is supported:
2071   if(pixelFormat == Pixel::RGB888 || pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8 || pixelFormat == Pixel::LA88 || pixelFormat == Pixel::RGB565)
2072   {
2073     switch(pixelFormat)
2074     {
2075       case Pixel::RGB888:
2076       {
2077         LinearSample3BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2078         break;
2079       }
2080       case Pixel::RGBA8888:
2081       {
2082         LinearSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2083         break;
2084       }
2085       case Pixel::L8:
2086       case Pixel::A8:
2087       {
2088         LinearSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2089         break;
2090       }
2091       case Pixel::LA88:
2092       {
2093         LinearSample2BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2094         break;
2095       }
2096       case Pixel::RGB565:
2097       {
2098         LinearSampleRGB565(inPixels, inDimensions, inStride, outPixels, outDimensions);
2099         break;
2100       }
2101       default:
2102       {
2103         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2104       }
2105     }
2106   }
2107   else
2108   {
2109     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not linear sampled: unsupported pixel format: %u.\n", uint32_t(pixelFormat));
2110   }
2111 }
2112
2113 void Resample(const uint8_t* __restrict__ inPixels,
2114               ImageDimensions inputDimensions,
2115               uint32_t        inputStride,
2116               uint8_t* __restrict__ outPixels,
2117               ImageDimensions   desiredDimensions,
2118               Resampler::Filter filterType,
2119               int               numChannels,
2120               bool              hasAlpha)
2121 {
2122   // Got from the test.cpp of the ImageResampler lib.
2123   const float ONE_DIV_255               = 1.0f / 255.0f;
2124   const int   MAX_UNSIGNED_CHAR         = std::numeric_limits<uint8_t>::max();
2125   const int   LINEAR_TO_SRGB_TABLE_SIZE = 4096;
2126   const int   ALPHA_CHANNEL             = hasAlpha ? (numChannels - 1) : 0;
2127
2128   static bool    loadColorSpaces = true;
2129   static float   srgbToLinear[MAX_UNSIGNED_CHAR + 1];
2130   static uint8_t linearToSrgb[LINEAR_TO_SRGB_TABLE_SIZE];
2131
2132   if(loadColorSpaces) // Only create the color space conversions on the first execution
2133   {
2134     loadColorSpaces = false;
2135
2136     for(int i = 0; i <= MAX_UNSIGNED_CHAR; ++i)
2137     {
2138       srgbToLinear[i] = pow(static_cast<float>(i) * ONE_DIV_255, DEFAULT_SOURCE_GAMMA);
2139     }
2140
2141     const float invLinearToSrgbTableSize = 1.0f / static_cast<float>(LINEAR_TO_SRGB_TABLE_SIZE);
2142     const float invSourceGamma           = 1.0f / DEFAULT_SOURCE_GAMMA;
2143
2144     for(int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
2145     {
2146       int k = static_cast<int>(255.0f * pow(static_cast<float>(i) * invLinearToSrgbTableSize, invSourceGamma) + 0.5f);
2147       if(k < 0)
2148       {
2149         k = 0;
2150       }
2151       else if(k > MAX_UNSIGNED_CHAR)
2152       {
2153         k = MAX_UNSIGNED_CHAR;
2154       }
2155       linearToSrgb[i] = static_cast<uint8_t>(k);
2156     }
2157   }
2158
2159   std::vector<Resampler*>    resamplers(numChannels);
2160   std::vector<Vector<float>> samples(numChannels);
2161
2162   const int srcWidth  = inputDimensions.GetWidth();
2163   const int srcHeight = inputDimensions.GetHeight();
2164   const int dstWidth  = desiredDimensions.GetWidth();
2165   const int dstHeight = desiredDimensions.GetHeight();
2166
2167   // Now create a Resampler instance for each component to process. The first instance will create new contributor tables, which are shared by the resamplers
2168   // used for the other components (a memory and slight cache efficiency optimization).
2169   resamplers[0] = new Resampler(srcWidth,
2170                                 srcHeight,
2171                                 dstWidth,
2172                                 dstHeight,
2173                                 Resampler::BOUNDARY_CLAMP,
2174                                 0.0f,          // sample_low,
2175                                 1.0f,          // sample_high. Clamp output samples to specified range, or disable clamping if sample_low >= sample_high.
2176                                 filterType,    // The type of filter.
2177                                 NULL,          // Pclist_x,
2178                                 NULL,          // Pclist_y. Optional pointers to contributor lists from another instance of a Resampler.
2179                                 FILTER_SCALE,  // src_x_ofs,
2180                                 FILTER_SCALE); // src_y_ofs. Offset input image by specified amount (fractional values okay).
2181   samples[0].ResizeUninitialized(srcWidth);
2182   for(int i = 1; i < numChannels; ++i)
2183   {
2184     resamplers[i] = new Resampler(srcWidth,
2185                                   srcHeight,
2186                                   dstWidth,
2187                                   dstHeight,
2188                                   Resampler::BOUNDARY_CLAMP,
2189                                   0.0f,
2190                                   1.0f,
2191                                   filterType,
2192                                   resamplers[0]->get_clist_x(),
2193                                   resamplers[0]->get_clist_y(),
2194                                   FILTER_SCALE,
2195                                   FILTER_SCALE);
2196     samples[i].ResizeUninitialized(srcWidth);
2197   }
2198
2199   const int srcPitch = inputStride * numChannels;
2200   const int dstPitch = dstWidth * numChannels;
2201   int       dstY     = 0;
2202
2203   for(int srcY = 0; srcY < srcHeight; ++srcY)
2204   {
2205     const uint8_t* pSrc = &inPixels[srcY * srcPitch];
2206
2207     for(int x = 0; x < srcWidth; ++x)
2208     {
2209       for(int c = 0; c < numChannels; ++c)
2210       {
2211         if(c == ALPHA_CHANNEL && hasAlpha)
2212         {
2213           samples[c][x] = *pSrc++ * ONE_DIV_255;
2214         }
2215         else
2216         {
2217           samples[c][x] = srgbToLinear[*pSrc++];
2218         }
2219       }
2220     }
2221
2222     for(int c = 0; c < numChannels; ++c)
2223     {
2224       if(!resamplers[c]->put_line(&samples[c][0]))
2225       {
2226         DALI_ASSERT_DEBUG(!"Out of memory");
2227       }
2228     }
2229
2230     for(;;)
2231     {
2232       int compIndex;
2233       for(compIndex = 0; compIndex < numChannels; ++compIndex)
2234       {
2235         const float* pOutputSamples = resamplers[compIndex]->get_line();
2236         if(!pOutputSamples)
2237         {
2238           break;
2239         }
2240
2241         const bool isAlphaChannel = (compIndex == ALPHA_CHANNEL && hasAlpha);
2242         DALI_ASSERT_DEBUG(dstY < dstHeight);
2243         uint8_t* pDst = &outPixels[dstY * dstPitch + compIndex];
2244
2245         for(int x = 0; x < dstWidth; ++x)
2246         {
2247           if(isAlphaChannel)
2248           {
2249             int c = static_cast<int>(255.0f * pOutputSamples[x] + 0.5f);
2250             if(c < 0)
2251             {
2252               c = 0;
2253             }
2254             else if(c > MAX_UNSIGNED_CHAR)
2255             {
2256               c = MAX_UNSIGNED_CHAR;
2257             }
2258             *pDst = static_cast<uint8_t>(c);
2259           }
2260           else
2261           {
2262             int j = static_cast<int>(LINEAR_TO_SRGB_TABLE_SIZE * pOutputSamples[x] + 0.5f);
2263             if(j < 0)
2264             {
2265               j = 0;
2266             }
2267             else if(j >= LINEAR_TO_SRGB_TABLE_SIZE)
2268             {
2269               j = LINEAR_TO_SRGB_TABLE_SIZE - 1;
2270             }
2271             *pDst = linearToSrgb[j];
2272           }
2273
2274           pDst += numChannels;
2275         }
2276       }
2277       if(compIndex < numChannels)
2278       {
2279         break;
2280       }
2281
2282       ++dstY;
2283     }
2284   }
2285
2286   // Delete the resamplers.
2287   for(int i = 0; i < numChannels; ++i)
2288   {
2289     delete resamplers[i];
2290   }
2291 }
2292
2293 void LanczosSample4BPP(const uint8_t* __restrict__ inPixels,
2294                        ImageDimensions inputDimensions,
2295                        uint32_t        inputStride,
2296                        uint8_t* __restrict__ outPixels,
2297                        ImageDimensions desiredDimensions)
2298 {
2299   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 4, true);
2300 }
2301
2302 void LanczosSample1BPP(const uint8_t* __restrict__ inPixels,
2303                        ImageDimensions inputDimensions,
2304                        uint32_t        inputStride,
2305                        uint8_t* __restrict__ outPixels,
2306                        ImageDimensions desiredDimensions)
2307 {
2308   // For L8 images
2309   Resample(inPixels, inputDimensions, inputStride, outPixels, desiredDimensions, Resampler::LANCZOS4, 1, false);
2310 }
2311
2312 // Dispatch to a format-appropriate third-party resampling function:
2313 void LanczosSample(const uint8_t* __restrict__ inPixels,
2314                    ImageDimensions inDimensions,
2315                    uint32_t        inStride,
2316                    Pixel::Format   pixelFormat,
2317                    uint8_t* __restrict__ outPixels,
2318                    ImageDimensions outDimensions)
2319 {
2320   // Check the pixel format is one that is supported:
2321   if(pixelFormat == Pixel::RGBA8888 || pixelFormat == Pixel::BGRA8888 || pixelFormat == Pixel::L8 || pixelFormat == Pixel::A8)
2322   {
2323     switch(pixelFormat)
2324     {
2325       case Pixel::RGBA8888:
2326       case Pixel::BGRA8888:
2327       {
2328         LanczosSample4BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2329         break;
2330       }
2331       case Pixel::L8:
2332       case Pixel::A8:
2333       {
2334         LanczosSample1BPP(inPixels, inDimensions, inStride, outPixels, outDimensions);
2335         break;
2336       }
2337       default:
2338       {
2339         DALI_ASSERT_DEBUG(0 == "Inner branch conditions don't match outer branch.");
2340       }
2341     }
2342   }
2343   else
2344   {
2345     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Bitmap was not lanczos sampled: unsupported pixel format: %u.\n", static_cast<uint32_t>(pixelFormat));
2346   }
2347 }
2348
2349 void RotateByShear(const uint8_t* const pixelsIn,
2350                    uint32_t             widthIn,
2351                    uint32_t             heightIn,
2352                    uint32_t             strideIn,
2353                    uint32_t             pixelSize,
2354                    float                radians,
2355                    uint8_t*&            pixelsOut,
2356                    uint32_t&            widthOut,
2357                    uint32_t&            heightOut)
2358 {
2359   // @note Code got from https://www.codeproject.com/Articles/202/High-quality-image-rotation-rotate-by-shear by Eran Yariv.
2360
2361   // Do first the fast rotations to transform the angle into a (-45..45] range.
2362
2363   bool fastRotationPerformed = false;
2364   if((radians > Math::PI_4) && (radians <= RAD_135))
2365   {
2366     // Angle in (45.0 .. 135.0]
2367     // Rotate image by 90 degrees into temporary image,
2368     // so it requires only an extra rotation angle
2369     // of -45.0 .. +45.0 to complete rotation.
2370     fastRotationPerformed = Rotate90(pixelsIn,
2371                                      widthIn,
2372                                      heightIn,
2373                                      strideIn,
2374                                      pixelSize,
2375                                      pixelsOut,
2376                                      widthOut,
2377                                      heightOut);
2378
2379     if(!fastRotationPerformed)
2380     {
2381       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2382       // The fast rotation failed.
2383       return;
2384     }
2385
2386     radians -= Math::PI_2;
2387   }
2388   else if((radians > RAD_135) && (radians <= RAD_225))
2389   {
2390     // Angle in (135.0 .. 225.0]
2391     // Rotate image by 180 degrees into temporary image,
2392     // so it requires only an extra rotation angle
2393     // of -45.0 .. +45.0 to complete rotation.
2394
2395     fastRotationPerformed = Rotate180(pixelsIn,
2396                                       widthIn,
2397                                       heightIn,
2398                                       strideIn,
2399                                       pixelSize,
2400                                       pixelsOut);
2401
2402     if(!fastRotationPerformed)
2403     {
2404       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2405       // The fast rotation failed.
2406       return;
2407     }
2408
2409     radians -= Math::PI;
2410     widthOut  = widthIn;
2411     heightOut = heightIn;
2412   }
2413   else if((radians > RAD_225) && (radians <= RAD_315))
2414   {
2415     // Angle in (225.0 .. 315.0]
2416     // Rotate image by 270 degrees into temporary image,
2417     // so it requires only an extra rotation angle
2418     // of -45.0 .. +45.0 to complete rotation.
2419
2420     fastRotationPerformed = Rotate270(pixelsIn,
2421                                       widthIn,
2422                                       heightIn,
2423                                       strideIn,
2424                                       pixelSize,
2425                                       pixelsOut,
2426                                       widthOut,
2427                                       heightOut);
2428
2429     if(!fastRotationPerformed)
2430     {
2431       DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "fast rotation failed\n");
2432       // The fast rotation failed.
2433       return;
2434     }
2435
2436     radians -= RAD_270;
2437   }
2438
2439   if(fabs(radians) < Dali::Math::MACHINE_EPSILON_10)
2440   {
2441     // Nothing else to do if the angle is zero.
2442     // The rotation angle was 90, 180 or 270.
2443
2444     // @note Allocated memory by 'Fast Rotations', if any, has to be freed by the called to this function.
2445     return;
2446   }
2447
2448   const uint8_t* const                      firstHorizontalSkewPixelsIn = fastRotationPerformed ? pixelsOut : pixelsIn;
2449   std::unique_ptr<uint8_t, void (*)(void*)> tmpPixelsInPtr((fastRotationPerformed ? pixelsOut : nullptr), free);
2450
2451   uint32_t stride = fastRotationPerformed ? widthOut : strideIn;
2452
2453   // Reset the input/output
2454   widthIn   = widthOut;
2455   heightIn  = heightOut;
2456   pixelsOut = nullptr;
2457
2458   const float angleSinus   = sin(radians);
2459   const float angleCosinus = cos(radians);
2460   const float angleTangent = tan(0.5f * radians);
2461
2462   ///////////////////////////////////////
2463   // Perform 1st shear (horizontal)
2464   ///////////////////////////////////////
2465
2466   // Calculate first shear (horizontal) destination image dimensions
2467
2468   widthOut  = widthIn + static_cast<uint32_t>(fabs(angleTangent) * static_cast<float>(heightIn));
2469   heightOut = heightIn;
2470
2471   // Allocate the buffer for the 1st shear
2472   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2473
2474   if(nullptr == pixelsOut)
2475   {
2476     widthOut  = 0u;
2477     heightOut = 0u;
2478
2479     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2480
2481     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Fast rotations'.
2482     // Nothing else to do if the memory allocation fails.
2483     return;
2484   }
2485
2486   for(uint32_t y = 0u; y < heightOut; ++y)
2487   {
2488     const float shear = angleTangent * ((angleTangent >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2489
2490     const int intShear = static_cast<int>(floor(shear));
2491     HorizontalSkew(firstHorizontalSkewPixelsIn, widthIn, stride, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2492   }
2493
2494   // Reset the 'pixel in' pointer with the output of the 'First Horizontal Skew' and free the memory allocated by the 'Fast Rotations'.
2495   tmpPixelsInPtr.reset(pixelsOut);
2496   uint32_t tmpWidthIn  = widthOut;
2497   uint32_t tmpHeightIn = heightOut;
2498
2499   // Reset the input/output
2500   pixelsOut = nullptr;
2501
2502   ///////////////////////////////////////
2503   // Perform 2nd shear (vertical)
2504   ///////////////////////////////////////
2505
2506   // Calc 2nd shear (vertical) destination image dimensions
2507   heightOut = static_cast<uint32_t>(static_cast<float>(widthIn) * fabs(angleSinus) + static_cast<float>(heightIn) * angleCosinus);
2508
2509   // Allocate the buffer for the 2nd shear
2510   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2511
2512   if(nullptr == pixelsOut)
2513   {
2514     widthOut  = 0u;
2515     heightOut = 0u;
2516
2517     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2518     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'First Horizontal Skew'.
2519     // Nothing else to do if the memory allocation fails.
2520     return;
2521   }
2522
2523   // Variable skew offset
2524   float offset = angleSinus * ((angleSinus > 0.f) ? static_cast<float>(widthIn - 1u) : -(static_cast<float>(widthIn) - static_cast<float>(widthOut)));
2525
2526   uint32_t column = 0u;
2527   for(column = 0u; column < widthOut; ++column, offset -= angleSinus)
2528   {
2529     const int32_t shear = static_cast<int32_t>(floor(offset));
2530     VerticalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpHeightIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, heightOut, column, shear, offset - static_cast<float>(shear));
2531   }
2532   // Reset the 'pixel in' pointer with the output of the 'Vertical Skew' and free the memory allocated by the 'First Horizontal Skew'.
2533   // Reset the input/output
2534   tmpPixelsInPtr.reset(pixelsOut);
2535   tmpWidthIn  = widthOut;
2536   tmpHeightIn = heightOut;
2537   pixelsOut   = nullptr;
2538
2539   ///////////////////////////////////////
2540   // Perform 3rd shear (horizontal)
2541   ///////////////////////////////////////
2542
2543   // Calc 3rd shear (horizontal) destination image dimensions
2544   widthOut = static_cast<uint32_t>(static_cast<float>(heightIn) * fabs(angleSinus) + static_cast<float>(widthIn) * angleCosinus) + 1u;
2545
2546   // Allocate the buffer for the 3rd shear
2547   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2548
2549   if(nullptr == pixelsOut)
2550   {
2551     widthOut  = 0u;
2552     heightOut = 0u;
2553
2554     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2555     // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2556     // Nothing else to do if the memory allocation fails.
2557     return;
2558   }
2559
2560   offset = (angleSinus >= 0.f) ? -angleSinus * angleTangent * static_cast<float>(widthIn - 1u) : angleTangent * (static_cast<float>(widthIn - 1u) * -angleSinus + (1.f - static_cast<float>(heightOut)));
2561
2562   for(uint32_t y = 0u; y < heightOut; ++y, offset += angleTangent)
2563   {
2564     const int32_t shear = static_cast<int32_t>(floor(offset));
2565     HorizontalSkew(tmpPixelsInPtr.get(), tmpWidthIn, tmpWidthIn, pixelSize, pixelsOut, widthOut, y, shear, offset - static_cast<float>(shear));
2566   }
2567
2568   // The deleter of the tmpPixelsInPtr unique pointer is called freeing the memory allocated by the 'Vertical Skew'.
2569   // @note Allocated memory by the last 'Horizontal Skew' has to be freed by the caller to this function.
2570 }
2571
2572 void HorizontalShear(const uint8_t* const pixelsIn,
2573                      uint32_t             widthIn,
2574                      uint32_t             heightIn,
2575                      uint32_t             strideIn,
2576                      uint32_t             pixelSize,
2577                      float                radians,
2578                      uint8_t*&            pixelsOut,
2579                      uint32_t&            widthOut,
2580                      uint32_t&            heightOut)
2581 {
2582   // Calculate the destination image dimensions.
2583
2584   const float absRadians = fabs(radians);
2585
2586   if(absRadians > Math::PI_4)
2587   {
2588     // Can't shear more than 45 degrees.
2589     widthOut  = 0u;
2590     heightOut = 0u;
2591
2592     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "Can't shear more than 45 degrees (PI/4 radians). radians : %f\n", radians);
2593     return;
2594   }
2595
2596   widthOut  = widthIn + static_cast<uint32_t>(ceil(absRadians * static_cast<float>(heightIn)));
2597   heightOut = heightIn;
2598
2599   // Allocate the buffer for the shear.
2600   pixelsOut = static_cast<uint8_t*>(malloc(widthOut * heightOut * pixelSize));
2601
2602   if(nullptr == pixelsOut)
2603   {
2604     widthOut  = 0u;
2605     heightOut = 0u;
2606
2607     DALI_LOG_INFO(gImageOpsLogFilter, Dali::Integration::Log::Verbose, "malloc failed to allocate memory\n");
2608     return;
2609   }
2610
2611   for(uint32_t y = 0u; y < heightOut; ++y)
2612   {
2613     const float shear = radians * ((radians >= 0.f) ? (0.5f + static_cast<float>(y)) : (0.5f + static_cast<float>(y) - static_cast<float>(heightOut)));
2614
2615     const int32_t intShear = static_cast<int32_t>(floor(shear));
2616     HorizontalSkew(pixelsIn, widthIn, strideIn, pixelSize, pixelsOut, widthOut, y, intShear, shear - static_cast<float>(intShear));
2617   }
2618 }
2619
2620 } /* namespace Platform */
2621 } /* namespace Internal */
2622 } /* namespace Dali */