src/third_party/libyuv/source/planar_functions.cc

   1 /*
   2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS. All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "libyuv/planar_functions.h"
  12
  13 #include <string.h>  // for memset()
  14
  15 #include "libyuv/cpu_id.h"
  16 #ifdef HAVE_JPEG
  17 #include "libyuv/mjpeg_decoder.h"
  18 #endif
  19 #include "libyuv/row.h"
  20
  21 #ifdef __cplusplus
  22 namespace libyuv {
  23 extern "C" {
  24 #endif
  25
  26 // Copy a plane of data
  27 LIBYUV_API
  28 void CopyPlane(const uint8* src_y, int src_stride_y,
  29                uint8* dst_y, int dst_stride_y,
  30                int width, int height) {
  31   int y;
  32   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
  33   // Coalesce rows.
  34   if (src_stride_y == width &&
  35       dst_stride_y == width) {
  36     width *= height;
  37     height = 1;
  38     src_stride_y = dst_stride_y = 0;
  39   }
  40   // Nothing to do.
  41   if (src_y == dst_y && src_stride_y == dst_stride_y) {
  42     return;
  43   }
  44 #if defined(HAS_COPYROW_X86)
  45   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
  46     CopyRow = CopyRow_X86;
  47   }
  48 #endif
  49 #if defined(HAS_COPYROW_SSE2)
  50   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
  51       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
  52       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
  53     CopyRow = CopyRow_SSE2;
  54   }
  55 #endif
  56 #if defined(HAS_COPYROW_ERMS)
  57   if (TestCpuFlag(kCpuHasERMS)) {
  58     CopyRow = CopyRow_ERMS;
  59   }
  60 #endif
  61 #if defined(HAS_COPYROW_NEON)
  62   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
  63     CopyRow = CopyRow_NEON;
  64   }
  65 #endif
  66 #if defined(HAS_COPYROW_MIPS)
  67   if (TestCpuFlag(kCpuHasMIPS)) {
  68     CopyRow = CopyRow_MIPS;
  69   }
  70 #endif
  71
  72   // Copy plane
  73   for (y = 0; y < height; ++y) {
  74     CopyRow(src_y, dst_y, width);
  75     src_y += src_stride_y;
  76     dst_y += dst_stride_y;
  77   }
  78 }
  79
  80 LIBYUV_API
  81 void CopyPlane_16(const uint16* src_y, int src_stride_y,
  82                   uint16* dst_y, int dst_stride_y,
  83                   int width, int height) {
  84   int y;
  85   void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
  86   // Coalesce rows.
  87   if (src_stride_y == width &&
  88       dst_stride_y == width) {
  89     width *= height;
  90     height = 1;
  91     src_stride_y = dst_stride_y = 0;
  92   }
  93 #if defined(HAS_COPYROW_16_X86)
  94   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
  95     CopyRow = CopyRow_16_X86;
  96   }
  97 #endif
  98 #if defined(HAS_COPYROW_16_SSE2)
  99   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
 100       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
 101       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
 102     CopyRow = CopyRow_16_SSE2;
 103   }
 104 #endif
 105 #if defined(HAS_COPYROW_16_ERMS)
 106   if (TestCpuFlag(kCpuHasERMS)) {
 107     CopyRow = CopyRow_16_ERMS;
 108   }
 109 #endif
 110 #if defined(HAS_COPYROW_16_NEON)
 111   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
 112     CopyRow = CopyRow_16_NEON;
 113   }
 114 #endif
 115 #if defined(HAS_COPYROW_16_MIPS)
 116   if (TestCpuFlag(kCpuHasMIPS)) {
 117     CopyRow = CopyRow_16_MIPS;
 118   }
 119 #endif
 120
 121   // Copy plane
 122   for (y = 0; y < height; ++y) {
 123     CopyRow(src_y, dst_y, width);
 124     src_y += src_stride_y;
 125     dst_y += dst_stride_y;
 126   }
 127 }
 128
 129 // Copy I422.
 130 LIBYUV_API
 131 int I422Copy(const uint8* src_y, int src_stride_y,
 132              const uint8* src_u, int src_stride_u,
 133              const uint8* src_v, int src_stride_v,
 134              uint8* dst_y, int dst_stride_y,
 135              uint8* dst_u, int dst_stride_u,
 136              uint8* dst_v, int dst_stride_v,
 137              int width, int height) {
 138   int halfwidth = (width + 1) >> 1;
 139   if (!src_y || !src_u || !src_v ||
 140       !dst_y || !dst_u || !dst_v ||
 141       width <= 0 || height == 0) {
 142     return -1;
 143   }
 144   // Negative height means invert the image.
 145   if (height < 0) {
 146     height = -height;
 147     src_y = src_y + (height - 1) * src_stride_y;
 148     src_u = src_u + (height - 1) * src_stride_u;
 149     src_v = src_v + (height - 1) * src_stride_v;
 150     src_stride_y = -src_stride_y;
 151     src_stride_u = -src_stride_u;
 152     src_stride_v = -src_stride_v;
 153   }
 154   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 155   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
 156   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
 157   return 0;
 158 }
 159
 160 // Copy I444.
 161 LIBYUV_API
 162 int I444Copy(const uint8* src_y, int src_stride_y,
 163              const uint8* src_u, int src_stride_u,
 164              const uint8* src_v, int src_stride_v,
 165              uint8* dst_y, int dst_stride_y,
 166              uint8* dst_u, int dst_stride_u,
 167              uint8* dst_v, int dst_stride_v,
 168              int width, int height) {
 169   if (!src_y || !src_u || !src_v ||
 170       !dst_y || !dst_u || !dst_v ||
 171       width <= 0 || height == 0) {
 172     return -1;
 173   }
 174   // Negative height means invert the image.
 175   if (height < 0) {
 176     height = -height;
 177     src_y = src_y + (height - 1) * src_stride_y;
 178     src_u = src_u + (height - 1) * src_stride_u;
 179     src_v = src_v + (height - 1) * src_stride_v;
 180     src_stride_y = -src_stride_y;
 181     src_stride_u = -src_stride_u;
 182     src_stride_v = -src_stride_v;
 183   }
 184
 185   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 186   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
 187   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
 188   return 0;
 189 }
 190
 191 // Copy I400.
 192 LIBYUV_API
 193 int I400ToI400(const uint8* src_y, int src_stride_y,
 194                uint8* dst_y, int dst_stride_y,
 195                int width, int height) {
 196   if (!src_y || !dst_y || width <= 0 || height == 0) {
 197     return -1;
 198   }
 199   // Negative height means invert the image.
 200   if (height < 0) {
 201     height = -height;
 202     src_y = src_y + (height - 1) * src_stride_y;
 203     src_stride_y = -src_stride_y;
 204   }
 205   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 206   return 0;
 207 }
 208
 209 // Convert I420 to I400.
 210 LIBYUV_API
 211 int I420ToI400(const uint8* src_y, int src_stride_y,
 212                const uint8* src_u, int src_stride_u,
 213                const uint8* src_v, int src_stride_v,
 214                uint8* dst_y, int dst_stride_y,
 215                int width, int height) {
 216   if (!src_y || !dst_y || width <= 0 || height == 0) {
 217     return -1;
 218   }
 219   // Negative height means invert the image.
 220   if (height < 0) {
 221     height = -height;
 222     src_y = src_y + (height - 1) * src_stride_y;
 223     src_stride_y = -src_stride_y;
 224   }
 225   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 226   return 0;
 227 }
 228
 229 // Mirror a plane of data.
 230 void MirrorPlane(const uint8* src_y, int src_stride_y,
 231                  uint8* dst_y, int dst_stride_y,
 232                  int width, int height) {
 233   int y;
 234   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
 235   // Negative height means invert the image.
 236   if (height < 0) {
 237     height = -height;
 238     src_y = src_y + (height - 1) * src_stride_y;
 239     src_stride_y = -src_stride_y;
 240   }
 241 #if defined(HAS_MIRRORROW_NEON)
 242   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
 243     MirrorRow = MirrorRow_NEON;
 244   }
 245 #endif
 246 #if defined(HAS_MIRRORROW_SSE2)
 247   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
 248     MirrorRow = MirrorRow_SSE2;
 249   }
 250 #endif
 251 #if defined(HAS_MIRRORROW_SSSE3)
 252   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
 253       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
 254       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
 255     MirrorRow = MirrorRow_SSSE3;
 256   }
 257 #endif
 258 #if defined(HAS_MIRRORROW_AVX2)
 259   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
 260     MirrorRow = MirrorRow_AVX2;
 261   }
 262 #endif
 263
 264   // Mirror plane
 265   for (y = 0; y < height; ++y) {
 266     MirrorRow(src_y, dst_y, width);
 267     src_y += src_stride_y;
 268     dst_y += dst_stride_y;
 269   }
 270 }
 271
 272 // Convert YUY2 to I422.
 273 LIBYUV_API
 274 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
 275                uint8* dst_y, int dst_stride_y,
 276                uint8* dst_u, int dst_stride_u,
 277                uint8* dst_v, int dst_stride_v,
 278                int width, int height) {
 279   int y;
 280   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
 281                          uint8* dst_u, uint8* dst_v, int pix) =
 282       YUY2ToUV422Row_C;
 283   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
 284       YUY2ToYRow_C;
 285   // Negative height means invert the image.
 286   if (height < 0) {
 287     height = -height;
 288     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
 289     src_stride_yuy2 = -src_stride_yuy2;
 290   }
 291   // Coalesce rows.
 292   if (src_stride_yuy2 == width * 2 &&
 293       dst_stride_y == width &&
 294       dst_stride_u * 2 == width &&
 295       dst_stride_v * 2 == width) {
 296     width *= height;
 297     height = 1;
 298     src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
 299   }
 300 #if defined(HAS_YUY2TOYROW_SSE2)
 301   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
 302     YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
 303     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
 304     if (IS_ALIGNED(width, 16)) {
 305       YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
 306       YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
 307       if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
 308         YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
 309         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
 310           YUY2ToYRow = YUY2ToYRow_SSE2;
 311         }
 312       }
 313     }
 314   }
 315 #endif
 316 #if defined(HAS_YUY2TOYROW_AVX2)
 317   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
 318     YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
 319     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
 320     if (IS_ALIGNED(width, 32)) {
 321       YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
 322       YUY2ToYRow = YUY2ToYRow_AVX2;
 323     }
 324   }
 325 #endif
 326 #if defined(HAS_YUY2TOYROW_NEON)
 327   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 328     YUY2ToYRow = YUY2ToYRow_Any_NEON;
 329     if (width >= 16) {
 330       YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
 331     }
 332     if (IS_ALIGNED(width, 16)) {
 333       YUY2ToYRow = YUY2ToYRow_NEON;
 334       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
 335     }
 336   }
 337 #endif
 338
 339   for (y = 0; y < height; ++y) {
 340     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
 341     YUY2ToYRow(src_yuy2, dst_y, width);
 342     src_yuy2 += src_stride_yuy2;
 343     dst_y += dst_stride_y;
 344     dst_u += dst_stride_u;
 345     dst_v += dst_stride_v;
 346   }
 347   return 0;
 348 }
 349
 350 // Convert UYVY to I422.
 351 LIBYUV_API
 352 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
 353                uint8* dst_y, int dst_stride_y,
 354                uint8* dst_u, int dst_stride_u,
 355                uint8* dst_v, int dst_stride_v,
 356                int width, int height) {
 357   int y;
 358   void (*UYVYToUV422Row)(const uint8* src_uyvy,
 359                          uint8* dst_u, uint8* dst_v, int pix) =
 360       UYVYToUV422Row_C;
 361   void (*UYVYToYRow)(const uint8* src_uyvy,
 362                      uint8* dst_y, int pix) = UYVYToYRow_C;
 363   // Negative height means invert the image.
 364   if (height < 0) {
 365     height = -height;
 366     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
 367     src_stride_uyvy = -src_stride_uyvy;
 368   }
 369   // Coalesce rows.
 370   if (src_stride_uyvy == width * 2 &&
 371       dst_stride_y == width &&
 372       dst_stride_u * 2 == width &&
 373       dst_stride_v * 2 == width) {
 374     width *= height;
 375     height = 1;
 376     src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
 377   }
 378 #if defined(HAS_UYVYTOYROW_SSE2)
 379   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
 380     UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
 381     UYVYToYRow = UYVYToYRow_Any_SSE2;
 382     if (IS_ALIGNED(width, 16)) {
 383       UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
 384       UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
 385       if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
 386         UYVYToUV422Row = UYVYToUV422Row_SSE2;
 387         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
 388           UYVYToYRow = UYVYToYRow_SSE2;
 389         }
 390       }
 391     }
 392   }
 393 #endif
 394 #if defined(HAS_UYVYTOYROW_AVX2)
 395   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
 396     UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
 397     UYVYToYRow = UYVYToYRow_Any_AVX2;
 398     if (IS_ALIGNED(width, 32)) {
 399       UYVYToUV422Row = UYVYToUV422Row_AVX2;
 400       UYVYToYRow = UYVYToYRow_AVX2;
 401     }
 402   }
 403 #endif
 404 #if defined(HAS_UYVYTOYROW_NEON)
 405   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 406     UYVYToYRow = UYVYToYRow_Any_NEON;
 407     if (width >= 16) {
 408       UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
 409     }
 410     if (IS_ALIGNED(width, 16)) {
 411       UYVYToYRow = UYVYToYRow_NEON;
 412       UYVYToUV422Row = UYVYToUV422Row_NEON;
 413     }
 414   }
 415 #endif
 416
 417   for (y = 0; y < height; ++y) {
 418     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
 419     UYVYToYRow(src_uyvy, dst_y, width);
 420     src_uyvy += src_stride_uyvy;
 421     dst_y += dst_stride_y;
 422     dst_u += dst_stride_u;
 423     dst_v += dst_stride_v;
 424   }
 425   return 0;
 426 }
 427
 428 // Mirror I400 with optional flipping
 429 LIBYUV_API
 430 int I400Mirror(const uint8* src_y, int src_stride_y,
 431                uint8* dst_y, int dst_stride_y,
 432                int width, int height) {
 433   if (!src_y || !dst_y ||
 434       width <= 0 || height == 0) {
 435     return -1;
 436   }
 437   // Negative height means invert the image.
 438   if (height < 0) {
 439     height = -height;
 440     src_y = src_y + (height - 1) * src_stride_y;
 441     src_stride_y = -src_stride_y;
 442   }
 443
 444   MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 445   return 0;
 446 }
 447
 448 // Mirror I420 with optional flipping
 449 LIBYUV_API
 450 int I420Mirror(const uint8* src_y, int src_stride_y,
 451                const uint8* src_u, int src_stride_u,
 452                const uint8* src_v, int src_stride_v,
 453                uint8* dst_y, int dst_stride_y,
 454                uint8* dst_u, int dst_stride_u,
 455                uint8* dst_v, int dst_stride_v,
 456                int width, int height) {
 457   int halfwidth = (width + 1) >> 1;
 458   int halfheight = (height + 1) >> 1;
 459   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
 460       width <= 0 || height == 0) {
 461     return -1;
 462   }
 463   // Negative height means invert the image.
 464   if (height < 0) {
 465     height = -height;
 466     halfheight = (height + 1) >> 1;
 467     src_y = src_y + (height - 1) * src_stride_y;
 468     src_u = src_u + (halfheight - 1) * src_stride_u;
 469     src_v = src_v + (halfheight - 1) * src_stride_v;
 470     src_stride_y = -src_stride_y;
 471     src_stride_u = -src_stride_u;
 472     src_stride_v = -src_stride_v;
 473   }
 474
 475   if (dst_y) {
 476     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 477   }
 478   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
 479   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
 480   return 0;
 481 }
 482
 483 // ARGB mirror.
 484 LIBYUV_API
 485 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
 486                uint8* dst_argb, int dst_stride_argb,
 487                int width, int height) {
 488   int y;
 489   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
 490       ARGBMirrorRow_C;
 491   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
 492     return -1;
 493   }
 494   // Negative height means invert the image.
 495   if (height < 0) {
 496     height = -height;
 497     src_argb = src_argb + (height - 1) * src_stride_argb;
 498     src_stride_argb = -src_stride_argb;
 499   }
 500
 501 #if defined(HAS_ARGBMIRRORROW_SSSE3)
 502   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
 503       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
 504       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
 505     ARGBMirrorRow = ARGBMirrorRow_SSSE3;
 506   }
 507 #endif
 508 #if defined(HAS_ARGBMIRRORROW_AVX2)
 509   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
 510     ARGBMirrorRow = ARGBMirrorRow_AVX2;
 511   }
 512 #endif
 513 #if defined(HAS_ARGBMIRRORROW_NEON)
 514   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
 515     ARGBMirrorRow = ARGBMirrorRow_NEON;
 516   }
 517 #endif
 518
 519   // Mirror plane
 520   for (y = 0; y < height; ++y) {
 521     ARGBMirrorRow(src_argb, dst_argb, width);
 522     src_argb += src_stride_argb;
 523     dst_argb += dst_stride_argb;
 524   }
 525   return 0;
 526 }
 527
 528 // Get a blender that optimized for the CPU, alignment and pixel count.
 529 // As there are 6 blenders to choose from, the caller should try to use
 530 // the same blend function for all pixels if possible.
 531 LIBYUV_API
 532 ARGBBlendRow GetARGBBlend() {
 533   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
 534                        uint8* dst_argb, int width) = ARGBBlendRow_C;
 535 #if defined(HAS_ARGBBLENDROW_SSSE3)
 536   if (TestCpuFlag(kCpuHasSSSE3)) {
 537     ARGBBlendRow = ARGBBlendRow_SSSE3;
 538     return ARGBBlendRow;
 539   }
 540 #endif
 541 #if defined(HAS_ARGBBLENDROW_SSE2)
 542   if (TestCpuFlag(kCpuHasSSE2)) {
 543     ARGBBlendRow = ARGBBlendRow_SSE2;
 544   }
 545 #endif
 546 #if defined(HAS_ARGBBLENDROW_NEON)
 547   if (TestCpuFlag(kCpuHasNEON)) {
 548     ARGBBlendRow = ARGBBlendRow_NEON;
 549   }
 550 #endif
 551   return ARGBBlendRow;
 552 }
 553
 554 // Alpha Blend 2 ARGB images and store to destination.
 555 LIBYUV_API
 556 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
 557               const uint8* src_argb1, int src_stride_argb1,
 558               uint8* dst_argb, int dst_stride_argb,
 559               int width, int height) {
 560   int y;
 561   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
 562                        uint8* dst_argb, int width) = GetARGBBlend();
 563   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 564     return -1;
 565   }
 566   // Negative height means invert the image.
 567   if (height < 0) {
 568     height = -height;
 569     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 570     dst_stride_argb = -dst_stride_argb;
 571   }
 572   // Coalesce rows.
 573   if (src_stride_argb0 == width * 4 &&
 574       src_stride_argb1 == width * 4 &&
 575       dst_stride_argb == width * 4) {
 576     width *= height;
 577     height = 1;
 578     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 579   }
 580
 581   for (y = 0; y < height; ++y) {
 582     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
 583     src_argb0 += src_stride_argb0;
 584     src_argb1 += src_stride_argb1;
 585     dst_argb += dst_stride_argb;
 586   }
 587   return 0;
 588 }
 589
 590 // Multiply 2 ARGB images and store to destination.
 591 LIBYUV_API
 592 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
 593                  const uint8* src_argb1, int src_stride_argb1,
 594                  uint8* dst_argb, int dst_stride_argb,
 595                  int width, int height) {
 596   int y;
 597   void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
 598                           int width) = ARGBMultiplyRow_C;
 599   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 600     return -1;
 601   }
 602   // Negative height means invert the image.
 603   if (height < 0) {
 604     height = -height;
 605     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 606     dst_stride_argb = -dst_stride_argb;
 607   }
 608   // Coalesce rows.
 609   if (src_stride_argb0 == width * 4 &&
 610       src_stride_argb1 == width * 4 &&
 611       dst_stride_argb == width * 4) {
 612     width *= height;
 613     height = 1;
 614     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 615   }
 616 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
 617   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
 618     ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
 619     if (IS_ALIGNED(width, 4)) {
 620       ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
 621     }
 622   }
 623 #endif
 624 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
 625   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
 626     ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
 627     if (IS_ALIGNED(width, 8)) {
 628       ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
 629     }
 630   }
 631 #endif
 632 #if defined(HAS_ARGBMULTIPLYROW_NEON)
 633   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 634     ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
 635     if (IS_ALIGNED(width, 8)) {
 636       ARGBMultiplyRow = ARGBMultiplyRow_NEON;
 637     }
 638   }
 639 #endif
 640
 641   // Multiply plane
 642   for (y = 0; y < height; ++y) {
 643     ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
 644     src_argb0 += src_stride_argb0;
 645     src_argb1 += src_stride_argb1;
 646     dst_argb += dst_stride_argb;
 647   }
 648   return 0;
 649 }
 650
 651 // Add 2 ARGB images and store to destination.
 652 LIBYUV_API
 653 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
 654             const uint8* src_argb1, int src_stride_argb1,
 655             uint8* dst_argb, int dst_stride_argb,
 656             int width, int height) {
 657   int y;
 658   void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
 659                      int width) = ARGBAddRow_C;
 660   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 661     return -1;
 662   }
 663   // Negative height means invert the image.
 664   if (height < 0) {
 665     height = -height;
 666     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 667     dst_stride_argb = -dst_stride_argb;
 668   }
 669   // Coalesce rows.
 670   if (src_stride_argb0 == width * 4 &&
 671       src_stride_argb1 == width * 4 &&
 672       dst_stride_argb == width * 4) {
 673     width *= height;
 674     height = 1;
 675     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 676   }
 677 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
 678   if (TestCpuFlag(kCpuHasSSE2)) {
 679     ARGBAddRow = ARGBAddRow_SSE2;
 680   }
 681 #endif
 682 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
 683   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
 684     ARGBAddRow = ARGBAddRow_Any_SSE2;
 685     if (IS_ALIGNED(width, 4)) {
 686       ARGBAddRow = ARGBAddRow_SSE2;
 687     }
 688   }
 689 #endif
 690 #if defined(HAS_ARGBADDROW_AVX2)
 691   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
 692     ARGBAddRow = ARGBAddRow_Any_AVX2;
 693     if (IS_ALIGNED(width, 8)) {
 694       ARGBAddRow = ARGBAddRow_AVX2;
 695     }
 696   }
 697 #endif
 698 #if defined(HAS_ARGBADDROW_NEON)
 699   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 700     ARGBAddRow = ARGBAddRow_Any_NEON;
 701     if (IS_ALIGNED(width, 8)) {
 702       ARGBAddRow = ARGBAddRow_NEON;
 703     }
 704   }
 705 #endif
 706
 707   // Add plane
 708   for (y = 0; y < height; ++y) {
 709     ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
 710     src_argb0 += src_stride_argb0;
 711     src_argb1 += src_stride_argb1;
 712     dst_argb += dst_stride_argb;
 713   }
 714   return 0;
 715 }
 716
 717 // Subtract 2 ARGB images and store to destination.
 718 LIBYUV_API
 719 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
 720                  const uint8* src_argb1, int src_stride_argb1,
 721                  uint8* dst_argb, int dst_stride_argb,
 722                  int width, int height) {
 723   int y;
 724   void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
 725                           int width) = ARGBSubtractRow_C;
 726   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 727     return -1;
 728   }
 729   // Negative height means invert the image.
 730   if (height < 0) {
 731     height = -height;
 732     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 733     dst_stride_argb = -dst_stride_argb;
 734   }
 735   // Coalesce rows.
 736   if (src_stride_argb0 == width * 4 &&
 737       src_stride_argb1 == width * 4 &&
 738       dst_stride_argb == width * 4) {
 739     width *= height;
 740     height = 1;
 741     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 742   }
 743 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
 744   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
 745     ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
 746     if (IS_ALIGNED(width, 4)) {
 747       ARGBSubtractRow = ARGBSubtractRow_SSE2;
 748     }
 749   }
 750 #endif
 751 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
 752   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
 753     ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
 754     if (IS_ALIGNED(width, 8)) {
 755       ARGBSubtractRow = ARGBSubtractRow_AVX2;
 756     }
 757   }
 758 #endif
 759 #if defined(HAS_ARGBSUBTRACTROW_NEON)
 760   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 761     ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
 762     if (IS_ALIGNED(width, 8)) {
 763       ARGBSubtractRow = ARGBSubtractRow_NEON;
 764     }
 765   }
 766 #endif
 767
 768   // Subtract plane
 769   for (y = 0; y < height; ++y) {
 770     ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
 771     src_argb0 += src_stride_argb0;
 772     src_argb1 += src_stride_argb1;
 773     dst_argb += dst_stride_argb;
 774   }
 775   return 0;
 776 }
 777
 778 // Convert I422 to BGRA.
 779 LIBYUV_API
 780 int I422ToBGRA(const uint8* src_y, int src_stride_y,
 781                const uint8* src_u, int src_stride_u,
 782                const uint8* src_v, int src_stride_v,
 783                uint8* dst_bgra, int dst_stride_bgra,
 784                int width, int height) {
 785   int y;
 786   void (*I422ToBGRARow)(const uint8* y_buf,
 787                         const uint8* u_buf,
 788                         const uint8* v_buf,
 789                         uint8* rgb_buf,
 790                         int width) = I422ToBGRARow_C;
 791   if (!src_y || !src_u || !src_v ||
 792       !dst_bgra ||
 793       width <= 0 || height == 0) {
 794     return -1;
 795   }
 796   // Negative height means invert the image.
 797   if (height < 0) {
 798     height = -height;
 799     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
 800     dst_stride_bgra = -dst_stride_bgra;
 801   }
 802   // Coalesce rows.
 803   if (src_stride_y == width &&
 804       src_stride_u * 2 == width &&
 805       src_stride_v * 2 == width &&
 806       dst_stride_bgra == width * 4) {
 807     width *= height;
 808     height = 1;
 809     src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
 810   }
 811 #if defined(HAS_I422TOBGRAROW_NEON)
 812   if (TestCpuFlag(kCpuHasNEON)) {
 813     I422ToBGRARow = I422ToBGRARow_Any_NEON;
 814     if (IS_ALIGNED(width, 16)) {
 815       I422ToBGRARow = I422ToBGRARow_NEON;
 816     }
 817   }
 818 #elif defined(HAS_I422TOBGRAROW_SSSE3)
 819   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 820     I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
 821     if (IS_ALIGNED(width, 8)) {
 822       I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
 823       if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
 824         I422ToBGRARow = I422ToBGRARow_SSSE3;
 825       }
 826     }
 827   }
 828 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
 829   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
 830       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
 831       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
 832       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
 833       IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
 834     I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
 835   }
 836 #endif
 837
 838   for (y = 0; y < height; ++y) {
 839     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
 840     dst_bgra += dst_stride_bgra;
 841     src_y += src_stride_y;
 842     src_u += src_stride_u;
 843     src_v += src_stride_v;
 844   }
 845   return 0;
 846 }
 847
 848 // Convert I422 to ABGR.
 849 LIBYUV_API
 850 int I422ToABGR(const uint8* src_y, int src_stride_y,
 851                const uint8* src_u, int src_stride_u,
 852                const uint8* src_v, int src_stride_v,
 853                uint8* dst_abgr, int dst_stride_abgr,
 854                int width, int height) {
 855   int y;
 856   void (*I422ToABGRRow)(const uint8* y_buf,
 857                         const uint8* u_buf,
 858                         const uint8* v_buf,
 859                         uint8* rgb_buf,
 860                         int width) = I422ToABGRRow_C;
 861   if (!src_y || !src_u || !src_v ||
 862       !dst_abgr ||
 863       width <= 0 || height == 0) {
 864     return -1;
 865   }
 866   // Negative height means invert the image.
 867   if (height < 0) {
 868     height = -height;
 869     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
 870     dst_stride_abgr = -dst_stride_abgr;
 871   }
 872   // Coalesce rows.
 873   if (src_stride_y == width &&
 874       src_stride_u * 2 == width &&
 875       src_stride_v * 2 == width &&
 876       dst_stride_abgr == width * 4) {
 877     width *= height;
 878     height = 1;
 879     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
 880   }
 881 #if defined(HAS_I422TOABGRROW_NEON)
 882   if (TestCpuFlag(kCpuHasNEON)) {
 883     I422ToABGRRow = I422ToABGRRow_Any_NEON;
 884     if (IS_ALIGNED(width, 16)) {
 885       I422ToABGRRow = I422ToABGRRow_NEON;
 886     }
 887   }
 888 #elif defined(HAS_I422TOABGRROW_SSSE3)
 889   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 890     I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
 891     if (IS_ALIGNED(width, 8)) {
 892       I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
 893       if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
 894         I422ToABGRRow = I422ToABGRRow_SSSE3;
 895       }
 896     }
 897   }
 898 #endif
 899
 900   for (y = 0; y < height; ++y) {
 901     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
 902     dst_abgr += dst_stride_abgr;
 903     src_y += src_stride_y;
 904     src_u += src_stride_u;
 905     src_v += src_stride_v;
 906   }
 907   return 0;
 908 }
 909
 910 // Convert I422 to RGBA.
 911 LIBYUV_API
 912 int I422ToRGBA(const uint8* src_y, int src_stride_y,
 913                const uint8* src_u, int src_stride_u,
 914                const uint8* src_v, int src_stride_v,
 915                uint8* dst_rgba, int dst_stride_rgba,
 916                int width, int height) {
 917   int y;
 918   void (*I422ToRGBARow)(const uint8* y_buf,
 919                         const uint8* u_buf,
 920                         const uint8* v_buf,
 921                         uint8* rgb_buf,
 922                         int width) = I422ToRGBARow_C;
 923   if (!src_y || !src_u || !src_v ||
 924       !dst_rgba ||
 925       width <= 0 || height == 0) {
 926     return -1;
 927   }
 928   // Negative height means invert the image.
 929   if (height < 0) {
 930     height = -height;
 931     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
 932     dst_stride_rgba = -dst_stride_rgba;
 933   }
 934   // Coalesce rows.
 935   if (src_stride_y == width &&
 936       src_stride_u * 2 == width &&
 937       src_stride_v * 2 == width &&
 938       dst_stride_rgba == width * 4) {
 939     width *= height;
 940     height = 1;
 941     src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
 942   }
 943 #if defined(HAS_I422TORGBAROW_NEON)
 944   if (TestCpuFlag(kCpuHasNEON)) {
 945     I422ToRGBARow = I422ToRGBARow_Any_NEON;
 946     if (IS_ALIGNED(width, 16)) {
 947       I422ToRGBARow = I422ToRGBARow_NEON;
 948     }
 949   }
 950 #elif defined(HAS_I422TORGBAROW_SSSE3)
 951   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 952     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
 953     if (IS_ALIGNED(width, 8)) {
 954       I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
 955       if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
 956         I422ToRGBARow = I422ToRGBARow_SSSE3;
 957       }
 958     }
 959   }
 960 #endif
 961
 962   for (y = 0; y < height; ++y) {
 963     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
 964     dst_rgba += dst_stride_rgba;
 965     src_y += src_stride_y;
 966     src_u += src_stride_u;
 967     src_v += src_stride_v;
 968   }
 969   return 0;
 970 }
 971
 972 // Convert NV12 to RGB565.
 973 LIBYUV_API
 974 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
 975                  const uint8* src_uv, int src_stride_uv,
 976                  uint8* dst_rgb565, int dst_stride_rgb565,
 977                  int width, int height) {
 978   int y;
 979   void (*NV12ToRGB565Row)(const uint8* y_buf,
 980                           const uint8* uv_buf,
 981                           uint8* rgb_buf,
 982                           int width) = NV12ToRGB565Row_C;
 983   if (!src_y || !src_uv || !dst_rgb565 ||
 984       width <= 0 || height == 0) {
 985     return -1;
 986   }
 987   // Negative height means invert the image.
 988   if (height < 0) {
 989     height = -height;
 990     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
 991     dst_stride_rgb565 = -dst_stride_rgb565;
 992   }
 993 #if defined(HAS_NV12TORGB565ROW_SSSE3)
 994   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 995     NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
 996     if (IS_ALIGNED(width, 8)) {
 997       NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
 998     }
 999   }
1000 #elif defined(HAS_NV12TORGB565ROW_NEON)
1001   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1002     NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
1003     if (IS_ALIGNED(width, 8)) {
1004       NV12ToRGB565Row = NV12ToRGB565Row_NEON;
1005     }
1006   }
1007 #endif
1008
1009   for (y = 0; y < height; ++y) {
1010     NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
1011     dst_rgb565 += dst_stride_rgb565;
1012     src_y += src_stride_y;
1013     if (y & 1) {
1014       src_uv += src_stride_uv;
1015     }
1016   }
1017   return 0;
1018 }
1019
1020 // Convert NV21 to RGB565.
1021 LIBYUV_API
1022 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
1023                  const uint8* src_vu, int src_stride_vu,
1024                  uint8* dst_rgb565, int dst_stride_rgb565,
1025                  int width, int height) {
1026   int y;
1027   void (*NV21ToRGB565Row)(const uint8* y_buf,
1028                           const uint8* src_vu,
1029                           uint8* rgb_buf,
1030                           int width) = NV21ToRGB565Row_C;
1031   if (!src_y || !src_vu || !dst_rgb565 ||
1032       width <= 0 || height == 0) {
1033     return -1;
1034   }
1035   // Negative height means invert the image.
1036   if (height < 0) {
1037     height = -height;
1038     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1039     dst_stride_rgb565 = -dst_stride_rgb565;
1040   }
1041 #if defined(HAS_NV21TORGB565ROW_SSSE3)
1042   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1043     NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
1044     if (IS_ALIGNED(width, 8)) {
1045       NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
1046     }
1047   }
1048 #elif defined(HAS_NV21TORGB565ROW_NEON)
1049   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1050     NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
1051     if (IS_ALIGNED(width, 8)) {
1052       NV21ToRGB565Row = NV21ToRGB565Row_NEON;
1053     }
1054   }
1055 #endif
1056
1057   for (y = 0; y < height; ++y) {
1058     NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
1059     dst_rgb565 += dst_stride_rgb565;
1060     src_y += src_stride_y;
1061     if (y & 1) {
1062       src_vu += src_stride_vu;
1063     }
1064   }
1065   return 0;
1066 }
1067
1068 LIBYUV_API
1069 void SetPlane(uint8* dst_y, int dst_stride_y,
1070               int width, int height,
1071               uint32 value) {
1072   int y;
1073   uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
1074   void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
1075   // Coalesce rows.
1076   if (dst_stride_y == width) {
1077     width *= height;
1078     height = 1;
1079     dst_stride_y = 0;
1080   }
1081 #if defined(HAS_SETROW_NEON)
1082   if (TestCpuFlag(kCpuHasNEON) &&
1083       IS_ALIGNED(width, 16) &&
1084       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
1085     SetRow = SetRow_NEON;
1086   }
1087 #endif
1088 #if defined(HAS_SETROW_X86)
1089   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
1090     SetRow = SetRow_X86;
1091   }
1092 #endif
1093
1094   // Set plane
1095   for (y = 0; y < height; ++y) {
1096     SetRow(dst_y, v32, width);
1097     dst_y += dst_stride_y;
1098   }
1099 }
1100
1101 // Draw a rectangle into I420
1102 LIBYUV_API
1103 int I420Rect(uint8* dst_y, int dst_stride_y,
1104              uint8* dst_u, int dst_stride_u,
1105              uint8* dst_v, int dst_stride_v,
1106              int x, int y,
1107              int width, int height,
1108              int value_y, int value_u, int value_v) {
1109   int halfwidth = (width + 1) >> 1;
1110   int halfheight = (height + 1) >> 1;
1111   uint8* start_y = dst_y + y * dst_stride_y + x;
1112   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1113   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1114   if (!dst_y || !dst_u || !dst_v ||
1115       width <= 0 || height <= 0 ||
1116       x < 0 || y < 0 ||
1117       value_y < 0 || value_y > 255 ||
1118       value_u < 0 || value_u > 255 ||
1119       value_v < 0 || value_v > 255) {
1120     return -1;
1121   }
1122
1123   SetPlane(start_y, dst_stride_y, width, height, value_y);
1124   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1125   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1126   return 0;
1127 }
1128
1129 // Draw a rectangle into ARGB
1130 LIBYUV_API
1131 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
1132              int dst_x, int dst_y,
1133              int width, int height,
1134              uint32 value) {
1135   if (!dst_argb ||
1136       width <= 0 || height <= 0 ||
1137       dst_x < 0 || dst_y < 0) {
1138     return -1;
1139   }
1140   dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1141   // Coalesce rows.
1142   if (dst_stride_argb == width * 4) {
1143     width *= height;
1144     height = 1;
1145     dst_stride_argb = 0;
1146   }
1147 #if defined(HAS_SETROW_NEON)
1148   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
1149       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1150     ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
1151     return 0;
1152   }
1153 #endif
1154 #if defined(HAS_SETROW_X86)
1155   if (TestCpuFlag(kCpuHasX86)) {
1156     ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
1157     return 0;
1158   }
1159 #endif
1160   ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
1161   return 0;
1162 }
1163
1164 // Convert unattentuated ARGB to preattenuated ARGB.
1165 // An unattenutated ARGB alpha blend uses the formula
1166 // p = a * f + (1 - a) * b
1167 // where
1168 //   p is output pixel
1169 //   f is foreground pixel
1170 //   b is background pixel
1171 //   a is alpha value from foreground pixel
1172 // An preattenutated ARGB alpha blend uses the formula
1173 // p = f + (1 - a) * b
1174 // where
1175 //   f is foreground pixel premultiplied by alpha
1176
1177 LIBYUV_API
1178 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1179                   uint8* dst_argb, int dst_stride_argb,
1180                   int width, int height) {
1181   int y;
1182   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1183                            int width) = ARGBAttenuateRow_C;
1184   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1185     return -1;
1186   }
1187   if (height < 0) {
1188     height = -height;
1189     src_argb = src_argb + (height - 1) * src_stride_argb;
1190     src_stride_argb = -src_stride_argb;
1191   }
1192   // Coalesce rows.
1193   if (src_stride_argb == width * 4 &&
1194       dst_stride_argb == width * 4) {
1195     width *= height;
1196     height = 1;
1197     src_stride_argb = dst_stride_argb = 0;
1198   }
1199 #if defined(HAS_ARGBATTENUATEROW_SSE2)
1200   if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
1201       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1202       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1203     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
1204     if (IS_ALIGNED(width, 4)) {
1205       ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
1206     }
1207   }
1208 #endif
1209 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1210   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1211     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1212     if (IS_ALIGNED(width, 4)) {
1213       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1214     }
1215   }
1216 #endif
1217 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1218   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1219     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1220     if (IS_ALIGNED(width, 8)) {
1221       ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1222     }
1223   }
1224 #endif
1225 #if defined(HAS_ARGBATTENUATEROW_NEON)
1226   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1227     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1228     if (IS_ALIGNED(width, 8)) {
1229       ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1230     }
1231   }
1232 #endif
1233
1234   for (y = 0; y < height; ++y) {
1235     ARGBAttenuateRow(src_argb, dst_argb, width);
1236     src_argb += src_stride_argb;
1237     dst_argb += dst_stride_argb;
1238   }
1239   return 0;
1240 }
1241
1242 // Convert preattentuated ARGB to unattenuated ARGB.
1243 LIBYUV_API
1244 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1245                     uint8* dst_argb, int dst_stride_argb,
1246                     int width, int height) {
1247   int y;
1248   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1249                              int width) = ARGBUnattenuateRow_C;
1250   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1251     return -1;
1252   }
1253   if (height < 0) {
1254     height = -height;
1255     src_argb = src_argb + (height - 1) * src_stride_argb;
1256     src_stride_argb = -src_stride_argb;
1257   }
1258   // Coalesce rows.
1259   if (src_stride_argb == width * 4 &&
1260       dst_stride_argb == width * 4) {
1261     width *= height;
1262     height = 1;
1263     src_stride_argb = dst_stride_argb = 0;
1264   }
1265 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1266   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1267     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1268     if (IS_ALIGNED(width, 4)) {
1269       ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1270     }
1271   }
1272 #endif
1273 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1274   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1275     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1276     if (IS_ALIGNED(width, 8)) {
1277       ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1278     }
1279   }
1280 #endif
1281 // TODO(fbarchard): Neon version.
1282
1283   for (y = 0; y < height; ++y) {
1284     ARGBUnattenuateRow(src_argb, dst_argb, width);
1285     src_argb += src_stride_argb;
1286     dst_argb += dst_stride_argb;
1287   }
1288   return 0;
1289 }
1290
1291 // Convert ARGB to Grayed ARGB.
1292 LIBYUV_API
1293 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1294                uint8* dst_argb, int dst_stride_argb,
1295                int width, int height) {
1296   int y;
1297   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1298                       int width) = ARGBGrayRow_C;
1299   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1300     return -1;
1301   }
1302   if (height < 0) {
1303     height = -height;
1304     src_argb = src_argb + (height - 1) * src_stride_argb;
1305     src_stride_argb = -src_stride_argb;
1306   }
1307   // Coalesce rows.
1308   if (src_stride_argb == width * 4 &&
1309       dst_stride_argb == width * 4) {
1310     width *= height;
1311     height = 1;
1312     src_stride_argb = dst_stride_argb = 0;
1313   }
1314 #if defined(HAS_ARGBGRAYROW_SSSE3)
1315   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1316       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1317       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1318     ARGBGrayRow = ARGBGrayRow_SSSE3;
1319   }
1320 #elif defined(HAS_ARGBGRAYROW_NEON)
1321   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1322     ARGBGrayRow = ARGBGrayRow_NEON;
1323   }
1324 #endif
1325
1326   for (y = 0; y < height; ++y) {
1327     ARGBGrayRow(src_argb, dst_argb, width);
1328     src_argb += src_stride_argb;
1329     dst_argb += dst_stride_argb;
1330   }
1331   return 0;
1332 }
1333
1334 // Make a rectangle of ARGB gray scale.
1335 LIBYUV_API
1336 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1337              int dst_x, int dst_y,
1338              int width, int height) {
1339   int y;
1340   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1341                       int width) = ARGBGrayRow_C;
1342   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1343   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1344     return -1;
1345   }
1346   // Coalesce rows.
1347   if (dst_stride_argb == width * 4) {
1348     width *= height;
1349     height = 1;
1350     dst_stride_argb = 0;
1351   }
1352 #if defined(HAS_ARGBGRAYROW_SSSE3)
1353   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1354       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1355     ARGBGrayRow = ARGBGrayRow_SSSE3;
1356   }
1357 #elif defined(HAS_ARGBGRAYROW_NEON)
1358   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1359     ARGBGrayRow = ARGBGrayRow_NEON;
1360   }
1361 #endif
1362   for (y = 0; y < height; ++y) {
1363     ARGBGrayRow(dst, dst, width);
1364     dst += dst_stride_argb;
1365   }
1366   return 0;
1367 }
1368
1369 // Make a rectangle of ARGB Sepia tone.
1370 LIBYUV_API
1371 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1372               int dst_x, int dst_y, int width, int height) {
1373   int y;
1374   void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1375   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1376   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1377     return -1;
1378   }
1379   // Coalesce rows.
1380   if (dst_stride_argb == width * 4) {
1381     width *= height;
1382     height = 1;
1383     dst_stride_argb = 0;
1384   }
1385 #if defined(HAS_ARGBSEPIAROW_SSSE3)
1386   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1387       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1388     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1389   }
1390 #elif defined(HAS_ARGBSEPIAROW_NEON)
1391   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1392     ARGBSepiaRow = ARGBSepiaRow_NEON;
1393   }
1394 #endif
1395   for (y = 0; y < height; ++y) {
1396     ARGBSepiaRow(dst, width);
1397     dst += dst_stride_argb;
1398   }
1399   return 0;
1400 }
1401
1402 // Apply a 4x4 matrix to each ARGB pixel.
1403 // Note: Normally for shading, but can be used to swizzle or invert.
1404 LIBYUV_API
1405 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
1406                     uint8* dst_argb, int dst_stride_argb,
1407                     const int8* matrix_argb,
1408                     int width, int height) {
1409   int y;
1410   void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1411       const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
1412   if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1413     return -1;
1414   }
1415   if (height < 0) {
1416     height = -height;
1417     src_argb = src_argb + (height - 1) * src_stride_argb;
1418     src_stride_argb = -src_stride_argb;
1419   }
1420   // Coalesce rows.
1421   if (src_stride_argb == width * 4 &&
1422       dst_stride_argb == width * 4) {
1423     width *= height;
1424     height = 1;
1425     src_stride_argb = dst_stride_argb = 0;
1426   }
1427 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1428   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1429       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1430     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1431   }
1432 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
1433   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1434     ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1435   }
1436 #endif
1437   for (y = 0; y < height; ++y) {
1438     ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1439     src_argb += src_stride_argb;
1440     dst_argb += dst_stride_argb;
1441   }
1442   return 0;
1443 }
1444
1445 // Apply a 4x3 matrix to each ARGB pixel.
1446 // Deprecated.
1447 LIBYUV_API
1448 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1449                    const int8* matrix_rgb,
1450                    int dst_x, int dst_y, int width, int height) {
1451   SIMD_ALIGNED(int8 matrix_argb[16]);
1452   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1453   if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
1454       dst_x < 0 || dst_y < 0) {
1455     return -1;
1456   }
1457
1458   // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
1459   matrix_argb[0] = matrix_rgb[0] / 2;
1460   matrix_argb[1] = matrix_rgb[1] / 2;
1461   matrix_argb[2] = matrix_rgb[2] / 2;
1462   matrix_argb[3] = matrix_rgb[3] / 2;
1463   matrix_argb[4] = matrix_rgb[4] / 2;
1464   matrix_argb[5] = matrix_rgb[5] / 2;
1465   matrix_argb[6] = matrix_rgb[6] / 2;
1466   matrix_argb[7] = matrix_rgb[7] / 2;
1467   matrix_argb[8] = matrix_rgb[8] / 2;
1468   matrix_argb[9] = matrix_rgb[9] / 2;
1469   matrix_argb[10] = matrix_rgb[10] / 2;
1470   matrix_argb[11] = matrix_rgb[11] / 2;
1471   matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
1472   matrix_argb[15] = 64;  // 1.0
1473
1474   return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
1475                          dst, dst_stride_argb,
1476                          &matrix_argb[0], width, height);
1477 }
1478
1479 // Apply a color table each ARGB pixel.
1480 // Table contains 256 ARGB values.
1481 LIBYUV_API
1482 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1483                    const uint8* table_argb,
1484                    int dst_x, int dst_y, int width, int height) {
1485   int y;
1486   void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1487                             int width) = ARGBColorTableRow_C;
1488   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1489   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1490       dst_x < 0 || dst_y < 0) {
1491     return -1;
1492   }
1493   // Coalesce rows.
1494   if (dst_stride_argb == width * 4) {
1495     width *= height;
1496     height = 1;
1497     dst_stride_argb = 0;
1498   }
1499 #if defined(HAS_ARGBCOLORTABLEROW_X86)
1500   if (TestCpuFlag(kCpuHasX86)) {
1501     ARGBColorTableRow = ARGBColorTableRow_X86;
1502   }
1503 #endif
1504   for (y = 0; y < height; ++y) {
1505     ARGBColorTableRow(dst, table_argb, width);
1506     dst += dst_stride_argb;
1507   }
1508   return 0;
1509 }
1510
1511 // Apply a color table each ARGB pixel but preserve destination alpha.
1512 // Table contains 256 ARGB values.
1513 LIBYUV_API
1514 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
1515                   const uint8* table_argb,
1516                   int dst_x, int dst_y, int width, int height) {
1517   int y;
1518   void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1519                            int width) = RGBColorTableRow_C;
1520   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1521   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1522       dst_x < 0 || dst_y < 0) {
1523     return -1;
1524   }
1525   // Coalesce rows.
1526   if (dst_stride_argb == width * 4) {
1527     width *= height;
1528     height = 1;
1529     dst_stride_argb = 0;
1530   }
1531 #if defined(HAS_RGBCOLORTABLEROW_X86)
1532   if (TestCpuFlag(kCpuHasX86)) {
1533     RGBColorTableRow = RGBColorTableRow_X86;
1534   }
1535 #endif
1536   for (y = 0; y < height; ++y) {
1537     RGBColorTableRow(dst, table_argb, width);
1538     dst += dst_stride_argb;
1539   }
1540   return 0;
1541 }
1542
1543 // ARGBQuantize is used to posterize art.
1544 // e.g. rgb / qvalue * qvalue + qvalue / 2
1545 // But the low levels implement efficiently with 3 parameters, and could be
1546 // used for other high level operations.
1547 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1548 // where scale is 1 / interval_size as a fixed point value.
1549 // The divide is replaces with a multiply by reciprocal fixed point multiply.
1550 // Caveat - although SSE2 saturates, the C function does not and should be used
1551 // with care if doing anything but quantization.
1552 LIBYUV_API
1553 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1554                  int scale, int interval_size, int interval_offset,
1555                  int dst_x, int dst_y, int width, int height) {
1556   int y;
1557   void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1558                           int interval_offset, int width) = ARGBQuantizeRow_C;
1559   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1560   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1561       interval_size < 1 || interval_size > 255) {
1562     return -1;
1563   }
1564   // Coalesce rows.
1565   if (dst_stride_argb == width * 4) {
1566     width *= height;
1567     height = 1;
1568     dst_stride_argb = 0;
1569   }
1570 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
1571   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1572       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1573     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1574   }
1575 #elif defined(HAS_ARGBQUANTIZEROW_NEON)
1576   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1577     ARGBQuantizeRow = ARGBQuantizeRow_NEON;
1578   }
1579 #endif
1580   for (y = 0; y < height; ++y) {
1581     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1582     dst += dst_stride_argb;
1583   }
1584   return 0;
1585 }
1586
1587 // Computes table of cumulative sum for image where the value is the sum
1588 // of all values above and to the left of the entry. Used by ARGBBlur.
1589 LIBYUV_API
1590 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1591                              int32* dst_cumsum, int dst_stride32_cumsum,
1592                              int width, int height) {
1593   int y;
1594   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1595       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1596   int32* previous_cumsum = dst_cumsum;
1597   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1598     return -1;
1599   }
1600 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1601   if (TestCpuFlag(kCpuHasSSE2)) {
1602     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1603   }
1604 #endif
1605   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
1606   for (y = 0; y < height; ++y) {
1607     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1608     previous_cumsum = dst_cumsum;
1609     dst_cumsum += dst_stride32_cumsum;
1610     src_argb += src_stride_argb;
1611   }
1612   return 0;
1613 }
1614
1615 // Blur ARGB image.
1616 // Caller should allocate CumulativeSum table of width * height * 16 bytes
1617 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1618 // as the buffer is treated as circular.
1619 LIBYUV_API
1620 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1621              uint8* dst_argb, int dst_stride_argb,
1622              int32* dst_cumsum, int dst_stride32_cumsum,
1623              int width, int height, int radius) {
1624   int y;
1625   void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
1626       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1627   void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
1628       int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
1629   int32* cumsum_bot_row;
1630   int32* max_cumsum_bot_row;
1631   int32* cumsum_top_row;
1632
1633   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1634     return -1;
1635   }
1636   if (height < 0) {
1637     height = -height;
1638     src_argb = src_argb + (height - 1) * src_stride_argb;
1639     src_stride_argb = -src_stride_argb;
1640   }
1641   if (radius > height) {
1642     radius = height;
1643   }
1644   if (radius > (width / 2 - 1)) {
1645     radius = width / 2 - 1;
1646   }
1647   if (radius <= 0) {
1648     return -1;
1649   }
1650 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1651   if (TestCpuFlag(kCpuHasSSE2)) {
1652     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1653     CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
1654   }
1655 #endif
1656   // Compute enough CumulativeSum for first row to be blurred. After this
1657   // one row of CumulativeSum is updated at a time.
1658   ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1659                            dst_cumsum, dst_stride32_cumsum,
1660                            width, radius);
1661
1662   src_argb = src_argb + radius * src_stride_argb;
1663   cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1664
1665   max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1666   cumsum_top_row = &dst_cumsum[0];
1667
1668   for (y = 0; y < height; ++y) {
1669     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1670     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1671     int area = radius * (bot_y - top_y);
1672     int boxwidth = radius * 4;
1673     int x;
1674     int n;
1675
1676     // Increment cumsum_top_row pointer with circular buffer wrap around.
1677     if (top_y) {
1678       cumsum_top_row += dst_stride32_cumsum;
1679       if (cumsum_top_row >= max_cumsum_bot_row) {
1680         cumsum_top_row = dst_cumsum;
1681       }
1682     }
1683     // Increment cumsum_bot_row pointer with circular buffer wrap around and
1684     // then fill in a row of CumulativeSum.
1685     if ((y + radius) < height) {
1686       const int32* prev_cumsum_bot_row = cumsum_bot_row;
1687       cumsum_bot_row += dst_stride32_cumsum;
1688       if (cumsum_bot_row >= max_cumsum_bot_row) {
1689         cumsum_bot_row = dst_cumsum;
1690       }
1691       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1692                               width);
1693       src_argb += src_stride_argb;
1694     }
1695
1696     // Left clipped.
1697     for (x = 0; x < radius + 1; ++x) {
1698       CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1699                                 boxwidth, area, &dst_argb[x * 4], 1);
1700       area += (bot_y - top_y);
1701       boxwidth += 4;
1702     }
1703
1704     // Middle unclipped.
1705     n = (width - 1) - radius - x + 1;
1706     CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1707                               boxwidth, area, &dst_argb[x * 4], n);
1708
1709     // Right clipped.
1710     for (x += n; x <= width - 1; ++x) {
1711       area -= (bot_y - top_y);
1712       boxwidth -= 4;
1713       CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
1714                                 cumsum_bot_row + (x - radius - 1) * 4,
1715                                 boxwidth, area, &dst_argb[x * 4], 1);
1716     }
1717     dst_argb += dst_stride_argb;
1718   }
1719   return 0;
1720 }
1721
1722 // Multiply ARGB image by a specified ARGB value.
1723 LIBYUV_API
1724 int ARGBShade(const uint8* src_argb, int src_stride_argb,
1725               uint8* dst_argb, int dst_stride_argb,
1726               int width, int height, uint32 value) {
1727   int y;
1728   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1729                        int width, uint32 value) = ARGBShadeRow_C;
1730   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1731     return -1;
1732   }
1733   if (height < 0) {
1734     height = -height;
1735     src_argb = src_argb + (height - 1) * src_stride_argb;
1736     src_stride_argb = -src_stride_argb;
1737   }
1738   // Coalesce rows.
1739   if (src_stride_argb == width * 4 &&
1740       dst_stride_argb == width * 4) {
1741     width *= height;
1742     height = 1;
1743     src_stride_argb = dst_stride_argb = 0;
1744   }
1745 #if defined(HAS_ARGBSHADEROW_SSE2)
1746   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1747       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1748       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1749     ARGBShadeRow = ARGBShadeRow_SSE2;
1750   }
1751 #elif defined(HAS_ARGBSHADEROW_NEON)
1752   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1753     ARGBShadeRow = ARGBShadeRow_NEON;
1754   }
1755 #endif
1756
1757   for (y = 0; y < height; ++y) {
1758     ARGBShadeRow(src_argb, dst_argb, width, value);
1759     src_argb += src_stride_argb;
1760     dst_argb += dst_stride_argb;
1761   }
1762   return 0;
1763 }
1764
1765 // Interpolate 2 ARGB images by specified amount (0 to 255).
1766 LIBYUV_API
1767 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1768                     const uint8* src_argb1, int src_stride_argb1,
1769                     uint8* dst_argb, int dst_stride_argb,
1770                     int width, int height, int interpolation) {
1771   int y;
1772   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1773                          ptrdiff_t src_stride, int dst_width,
1774                          int source_y_fraction) = InterpolateRow_C;
1775   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1776     return -1;
1777   }
1778   // Negative height means invert the image.
1779   if (height < 0) {
1780     height = -height;
1781     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1782     dst_stride_argb = -dst_stride_argb;
1783   }
1784   // Coalesce rows.
1785   if (src_stride_argb0 == width * 4 &&
1786       src_stride_argb1 == width * 4 &&
1787       dst_stride_argb == width * 4) {
1788     width *= height;
1789     height = 1;
1790     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1791   }
1792 #if defined(HAS_INTERPOLATEROW_SSE2)
1793   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1794     InterpolateRow = InterpolateRow_Any_SSE2;
1795     if (IS_ALIGNED(width, 4)) {
1796       InterpolateRow = InterpolateRow_Unaligned_SSE2;
1797       if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1798           IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1799           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1800         InterpolateRow = InterpolateRow_SSE2;
1801       }
1802     }
1803   }
1804 #endif
1805 #if defined(HAS_INTERPOLATEROW_SSSE3)
1806   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1807     InterpolateRow = InterpolateRow_Any_SSSE3;
1808     if (IS_ALIGNED(width, 4)) {
1809       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
1810       if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1811           IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1812           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1813         InterpolateRow = InterpolateRow_SSSE3;
1814       }
1815     }
1816   }
1817 #endif
1818 #if defined(HAS_INTERPOLATEROW_AVX2)
1819   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1820     InterpolateRow = InterpolateRow_Any_AVX2;
1821     if (IS_ALIGNED(width, 8)) {
1822       InterpolateRow = InterpolateRow_AVX2;
1823     }
1824   }
1825 #endif
1826 #if defined(HAS_INTERPOLATEROW_NEON)
1827   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1828     InterpolateRow = InterpolateRow_Any_NEON;
1829     if (IS_ALIGNED(width, 4)) {
1830       InterpolateRow = InterpolateRow_NEON;
1831     }
1832   }
1833 #endif
1834 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
1835   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
1836       IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
1837       IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
1838       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
1839     ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
1840   }
1841 #endif
1842
1843   for (y = 0; y < height; ++y) {
1844     InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
1845                    width * 4, interpolation);
1846     src_argb0 += src_stride_argb0;
1847     src_argb1 += src_stride_argb1;
1848     dst_argb += dst_stride_argb;
1849   }
1850   return 0;
1851 }
1852
1853 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
1854 LIBYUV_API
1855 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
1856                 uint8* dst_argb, int dst_stride_argb,
1857                 const uint8* shuffler, int width, int height) {
1858   int y;
1859   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
1860                          const uint8* shuffler, int pix) = ARGBShuffleRow_C;
1861   if (!src_bgra || !dst_argb ||
1862       width <= 0 || height == 0) {
1863     return -1;
1864   }
1865   // Negative height means invert the image.
1866   if (height < 0) {
1867     height = -height;
1868     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
1869     src_stride_bgra = -src_stride_bgra;
1870   }
1871   // Coalesce rows.
1872   if (src_stride_bgra == width * 4 &&
1873       dst_stride_argb == width * 4) {
1874     width *= height;
1875     height = 1;
1876     src_stride_bgra = dst_stride_argb = 0;
1877   }
1878 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
1879   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1880     ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
1881     if (IS_ALIGNED(width, 4)) {
1882       ARGBShuffleRow = ARGBShuffleRow_SSE2;
1883     }
1884   }
1885 #endif
1886 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
1887   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1888     ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
1889     if (IS_ALIGNED(width, 8)) {
1890       ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
1891       if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
1892           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1893         ARGBShuffleRow = ARGBShuffleRow_SSSE3;
1894       }
1895     }
1896   }
1897 #endif
1898 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
1899   if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
1900     ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
1901     if (IS_ALIGNED(width, 16)) {
1902       ARGBShuffleRow = ARGBShuffleRow_AVX2;
1903     }
1904   }
1905 #endif
1906 #if defined(HAS_ARGBSHUFFLEROW_NEON)
1907   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1908     ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
1909     if (IS_ALIGNED(width, 4)) {
1910       ARGBShuffleRow = ARGBShuffleRow_NEON;
1911     }
1912   }
1913 #endif
1914
1915   for (y = 0; y < height; ++y) {
1916     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
1917     src_bgra += src_stride_bgra;
1918     dst_argb += dst_stride_argb;
1919   }
1920   return 0;
1921 }
1922
1923 // Sobel ARGB effect.
1924 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
1925                         uint8* dst_argb, int dst_stride_argb,
1926                         int width, int height,
1927                         void (*SobelRow)(const uint8* src_sobelx,
1928                                          const uint8* src_sobely,
1929                                          uint8* dst, int width)) {
1930   int y;
1931   void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
1932                          uint32 selector, int pix) = ARGBToBayerGGRow_C;
1933   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
1934                     uint8* dst_sobely, int width) = SobelYRow_C;
1935   void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
1936                     const uint8* src_y2, uint8* dst_sobely, int width) =
1937       SobelXRow_C;
1938   const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
1939   if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
1940     return -1;
1941   }
1942   // Negative height means invert the image.
1943   if (height < 0) {
1944     height = -height;
1945     src_argb  = src_argb  + (height - 1) * src_stride_argb;
1946     src_stride_argb = -src_stride_argb;
1947   }
1948   // ARGBToBayer used to select G channel from ARGB.
1949 #if defined(HAS_ARGBTOBAYERGGROW_SSE2)
1950   if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
1951       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1952     ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
1953     if (IS_ALIGNED(width, 8)) {
1954       ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
1955     }
1956   }
1957 #endif
1958 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
1959   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
1960       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1961     ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
1962     if (IS_ALIGNED(width, 8)) {
1963       ARGBToBayerRow = ARGBToBayerRow_SSSE3;
1964     }
1965   }
1966 #endif
1967 #if defined(HAS_ARGBTOBAYERGGROW_NEON)
1968   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1969     ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
1970     if (IS_ALIGNED(width, 8)) {
1971       ARGBToBayerRow = ARGBToBayerGGRow_NEON;
1972     }
1973   }
1974 #endif
1975 #if defined(HAS_SOBELYROW_SSE2)
1976   if (TestCpuFlag(kCpuHasSSE2)) {
1977     SobelYRow = SobelYRow_SSE2;
1978   }
1979 #endif
1980 #if defined(HAS_SOBELYROW_NEON)
1981   if (TestCpuFlag(kCpuHasNEON)) {
1982     SobelYRow = SobelYRow_NEON;
1983   }
1984 #endif
1985 #if defined(HAS_SOBELXROW_SSE2)
1986   if (TestCpuFlag(kCpuHasSSE2)) {
1987     SobelXRow = SobelXRow_SSE2;
1988   }
1989 #endif
1990 #if defined(HAS_SOBELXROW_NEON)
1991   if (TestCpuFlag(kCpuHasNEON)) {
1992     SobelXRow = SobelXRow_NEON;
1993   }
1994 #endif
1995   {
1996     // 3 rows with edges before/after.
1997     const int kRowSize = (width + kEdge + 15) & ~15;
1998     align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
1999     uint8* row_sobelx = rows;
2000     uint8* row_sobely = rows + kRowSize;
2001     uint8* row_y = rows + kRowSize * 2;
2002
2003     // Convert first row.
2004     uint8* row_y0 = row_y + kEdge;
2005     uint8* row_y1 = row_y0 + kRowSize;
2006     uint8* row_y2 = row_y1 + kRowSize;
2007     ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
2008     row_y0[-1] = row_y0[0];
2009     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
2010     ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
2011     row_y1[-1] = row_y1[0];
2012     memset(row_y1 + width, row_y1[width - 1], 16);
2013     memset(row_y2 + width, 0, 16);
2014
2015     for (y = 0; y < height; ++y) {
2016       // Convert next row of ARGB to Y.
2017       if (y < (height - 1)) {
2018         src_argb += src_stride_argb;
2019       }
2020       ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
2021       row_y2[-1] = row_y2[0];
2022       row_y2[width] = row_y2[width - 1];
2023
2024       SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
2025       SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
2026       SobelRow(row_sobelx, row_sobely, dst_argb, width);
2027
2028       // Cycle thru circular queue of 3 row_y buffers.
2029       {
2030         uint8* row_yt = row_y0;
2031         row_y0 = row_y1;
2032         row_y1 = row_y2;
2033         row_y2 = row_yt;
2034       }
2035
2036       dst_argb += dst_stride_argb;
2037     }
2038     free_aligned_buffer_64(rows);
2039   }
2040   return 0;
2041 }
2042
2043 // Sobel ARGB effect.
2044 LIBYUV_API
2045 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
2046               uint8* dst_argb, int dst_stride_argb,
2047               int width, int height) {
2048   void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
2049                    uint8* dst_argb, int width) = SobelRow_C;
2050 #if defined(HAS_SOBELROW_SSE2)
2051   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2052       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2053     SobelRow = SobelRow_SSE2;
2054   }
2055 #endif
2056 #if defined(HAS_SOBELROW_NEON)
2057   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2058     SobelRow = SobelRow_NEON;
2059   }
2060 #endif
2061   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2062                       width, height, SobelRow);
2063 }
2064
2065 // Sobel ARGB effect with planar output.
2066 LIBYUV_API
2067 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
2068                      uint8* dst_y, int dst_stride_y,
2069                      int width, int height) {
2070   void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2071                           uint8* dst_, int width) = SobelToPlaneRow_C;
2072 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2073   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2074       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
2075     SobelToPlaneRow = SobelToPlaneRow_SSE2;
2076   }
2077 #endif
2078 #if defined(HAS_SOBELTOPLANEROW_NEON)
2079   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
2080     SobelToPlaneRow = SobelToPlaneRow_NEON;
2081   }
2082 #endif
2083   return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
2084                       width, height, SobelToPlaneRow);
2085 }
2086
2087 // SobelXY ARGB effect.
2088 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
2089 LIBYUV_API
2090 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
2091                 uint8* dst_argb, int dst_stride_argb,
2092                 int width, int height) {
2093   void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2094                      uint8* dst_argb, int width) = SobelXYRow_C;
2095 #if defined(HAS_SOBELXYROW_SSE2)
2096   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2097       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2098     SobelXYRow = SobelXYRow_SSE2;
2099   }
2100 #endif
2101 #if defined(HAS_SOBELXYROW_NEON)
2102   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2103     SobelXYRow = SobelXYRow_NEON;
2104   }
2105 #endif
2106   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2107                       width, height, SobelXYRow);
2108 }
2109
2110 // Apply a 4x4 polynomial to each ARGB pixel.
2111 LIBYUV_API
2112 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
2113                    uint8* dst_argb, int dst_stride_argb,
2114                    const float* poly,
2115                    int width, int height) {
2116   int y;
2117   void (*ARGBPolynomialRow)(const uint8* src_argb,
2118                             uint8* dst_argb, const float* poly,
2119                             int width) = ARGBPolynomialRow_C;
2120   if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2121     return -1;
2122   }
2123   // Negative height means invert the image.
2124   if (height < 0) {
2125     height = -height;
2126     src_argb  = src_argb  + (height - 1) * src_stride_argb;
2127     src_stride_argb = -src_stride_argb;
2128   }
2129   // Coalesce rows.
2130   if (src_stride_argb == width * 4 &&
2131       dst_stride_argb == width * 4) {
2132     width *= height;
2133     height = 1;
2134     src_stride_argb = dst_stride_argb = 0;
2135   }
2136 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2137   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2138     ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2139   }
2140 #endif
2141 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2142   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2143       IS_ALIGNED(width, 2)) {
2144     ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2145   }
2146 #endif
2147
2148   for (y = 0; y < height; ++y) {
2149     ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2150     src_argb += src_stride_argb;
2151     dst_argb += dst_stride_argb;
2152   }
2153   return 0;
2154 }
2155
2156 // Apply a lumacolortable to each ARGB pixel.
2157 LIBYUV_API
2158 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
2159                        uint8* dst_argb, int dst_stride_argb,
2160                        const uint8* luma,
2161                        int width, int height) {
2162   int y;
2163   void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
2164       int width, const uint8* luma, const uint32 lumacoeff) =
2165       ARGBLumaColorTableRow_C;
2166   if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2167     return -1;
2168   }
2169   // Negative height means invert the image.
2170   if (height < 0) {
2171     height = -height;
2172     src_argb  = src_argb  + (height - 1) * src_stride_argb;
2173     src_stride_argb = -src_stride_argb;
2174   }
2175   // Coalesce rows.
2176   if (src_stride_argb == width * 4 &&
2177       dst_stride_argb == width * 4) {
2178     width *= height;
2179     height = 1;
2180     src_stride_argb = dst_stride_argb = 0;
2181   }
2182 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2183   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2184     ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2185   }
2186 #endif
2187
2188   for (y = 0; y < height; ++y) {
2189     ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2190     src_argb += src_stride_argb;
2191     dst_argb += dst_stride_argb;
2192   }
2193   return 0;
2194 }
2195
2196 // Copy Alpha from one ARGB image to another.
2197 LIBYUV_API
2198 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
2199                   uint8* dst_argb, int dst_stride_argb,
2200                   int width, int height) {
2201   int y;
2202   void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2203       ARGBCopyAlphaRow_C;
2204   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2205     return -1;
2206   }
2207   // Negative height means invert the image.
2208   if (height < 0) {
2209     height = -height;
2210     src_argb = src_argb + (height - 1) * src_stride_argb;
2211     src_stride_argb = -src_stride_argb;
2212   }
2213   // Coalesce rows.
2214   if (src_stride_argb == width * 4 &&
2215       dst_stride_argb == width * 4) {
2216     width *= height;
2217     height = 1;
2218     src_stride_argb = dst_stride_argb = 0;
2219   }
2220 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2221   if (TestCpuFlag(kCpuHasSSE2) &&
2222       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
2223       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2224       IS_ALIGNED(width, 8)) {
2225     ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2226   }
2227 #endif
2228 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2229   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2230     ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2231   }
2232 #endif
2233
2234   for (y = 0; y < height; ++y) {
2235     ARGBCopyAlphaRow(src_argb, dst_argb, width);
2236     src_argb += src_stride_argb;
2237     dst_argb += dst_stride_argb;
2238   }
2239   return 0;
2240 }
2241
2242 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
2243 LIBYUV_API
2244 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
2245                      uint8* dst_argb, int dst_stride_argb,
2246                      int width, int height) {
2247   int y;
2248   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
2249       ARGBCopyYToAlphaRow_C;
2250   if (!src_y || !dst_argb || width <= 0 || height == 0) {
2251     return -1;
2252   }
2253   // Negative height means invert the image.
2254   if (height < 0) {
2255     height = -height;
2256     src_y = src_y + (height - 1) * src_stride_y;
2257     src_stride_y = -src_stride_y;
2258   }
2259   // Coalesce rows.
2260   if (src_stride_y == width &&
2261       dst_stride_argb == width * 4) {
2262     width *= height;
2263     height = 1;
2264     src_stride_y = dst_stride_argb = 0;
2265   }
2266 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
2267   if (TestCpuFlag(kCpuHasSSE2) &&
2268       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
2269       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2270       IS_ALIGNED(width, 8)) {
2271     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
2272   }
2273 #endif
2274 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
2275   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2276     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
2277   }
2278 #endif
2279
2280   for (y = 0; y < height; ++y) {
2281     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
2282     src_y += src_stride_y;
2283     dst_argb += dst_stride_argb;
2284   }
2285   return 0;
2286 }
2287
2288 #ifdef __cplusplus
2289 }  // extern "C"
2290 }  // namespace libyuv
2291 #endif