src/third_party/libyuv/source/planar_functions.cc

   1 /*
   2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS. All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "libyuv/planar_functions.h"
  12
  13 #include <string.h>  // for memset()
  14
  15 #include "libyuv/cpu_id.h"
  16 #ifdef HAVE_JPEG
  17 #include "libyuv/mjpeg_decoder.h"
  18 #endif
  19 #include "libyuv/row.h"
  20
  21 #ifdef __cplusplus
  22 namespace libyuv {
  23 extern "C" {
  24 #endif
  25
  26 // Copy a plane of data
  27 LIBYUV_API
  28 void CopyPlane(const uint8* src_y, int src_stride_y,
  29                uint8* dst_y, int dst_stride_y,
  30                int width, int height) {
  31   int y;
  32   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
  33   // Coalesce rows.
  34   if (src_stride_y == width &&
  35       dst_stride_y == width) {
  36     width *= height;
  37     height = 1;
  38     src_stride_y = dst_stride_y = 0;
  39   }
  40   // Nothing to do.
  41   if (src_y == dst_y && src_stride_y == dst_stride_y) {
  42     return;
  43   }
  44 #if defined(HAS_COPYROW_X86)
  45   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
  46     CopyRow = CopyRow_X86;
  47   }
  48 #endif
  49 #if defined(HAS_COPYROW_SSE2)
  50   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
  51     CopyRow = CopyRow_SSE2;
  52   }
  53 #endif
  54 #if defined(HAS_COPYROW_AVX)
  55   if (TestCpuFlag(kCpuHasAVX) && IS_ALIGNED(width, 64)) {
  56     CopyRow = CopyRow_AVX;
  57   }
  58 #endif
  59 #if defined(HAS_COPYROW_ERMS)
  60   if (TestCpuFlag(kCpuHasERMS)) {
  61     CopyRow = CopyRow_ERMS;
  62   }
  63 #endif
  64 #if defined(HAS_COPYROW_NEON)
  65   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
  66     CopyRow = CopyRow_NEON;
  67   }
  68 #endif
  69 #if defined(HAS_COPYROW_MIPS)
  70   if (TestCpuFlag(kCpuHasMIPS)) {
  71     CopyRow = CopyRow_MIPS;
  72   }
  73 #endif
  74
  75   // Copy plane
  76   for (y = 0; y < height; ++y) {
  77     CopyRow(src_y, dst_y, width);
  78     src_y += src_stride_y;
  79     dst_y += dst_stride_y;
  80   }
  81 }
  82
  83 LIBYUV_API
  84 void CopyPlane_16(const uint16* src_y, int src_stride_y,
  85                   uint16* dst_y, int dst_stride_y,
  86                   int width, int height) {
  87   int y;
  88   void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
  89   // Coalesce rows.
  90   if (src_stride_y == width &&
  91       dst_stride_y == width) {
  92     width *= height;
  93     height = 1;
  94     src_stride_y = dst_stride_y = 0;
  95   }
  96 #if defined(HAS_COPYROW_16_X86)
  97   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
  98     CopyRow = CopyRow_16_X86;
  99   }
 100 #endif
 101 #if defined(HAS_COPYROW_16_SSE2)
 102   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
 103     CopyRow = CopyRow_16_SSE2;
 104   }
 105 #endif
 106 #if defined(HAS_COPYROW_16_ERMS)
 107   if (TestCpuFlag(kCpuHasERMS)) {
 108     CopyRow = CopyRow_16_ERMS;
 109   }
 110 #endif
 111 #if defined(HAS_COPYROW_16_NEON)
 112   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
 113     CopyRow = CopyRow_16_NEON;
 114   }
 115 #endif
 116 #if defined(HAS_COPYROW_16_MIPS)
 117   if (TestCpuFlag(kCpuHasMIPS)) {
 118     CopyRow = CopyRow_16_MIPS;
 119   }
 120 #endif
 121
 122   // Copy plane
 123   for (y = 0; y < height; ++y) {
 124     CopyRow(src_y, dst_y, width);
 125     src_y += src_stride_y;
 126     dst_y += dst_stride_y;
 127   }
 128 }
 129
 130 // Copy I422.
 131 LIBYUV_API
 132 int I422Copy(const uint8* src_y, int src_stride_y,
 133              const uint8* src_u, int src_stride_u,
 134              const uint8* src_v, int src_stride_v,
 135              uint8* dst_y, int dst_stride_y,
 136              uint8* dst_u, int dst_stride_u,
 137              uint8* dst_v, int dst_stride_v,
 138              int width, int height) {
 139   int halfwidth = (width + 1) >> 1;
 140   if (!src_y || !src_u || !src_v ||
 141       !dst_y || !dst_u || !dst_v ||
 142       width <= 0 || height == 0) {
 143     return -1;
 144   }
 145   // Negative height means invert the image.
 146   if (height < 0) {
 147     height = -height;
 148     src_y = src_y + (height - 1) * src_stride_y;
 149     src_u = src_u + (height - 1) * src_stride_u;
 150     src_v = src_v + (height - 1) * src_stride_v;
 151     src_stride_y = -src_stride_y;
 152     src_stride_u = -src_stride_u;
 153     src_stride_v = -src_stride_v;
 154   }
 155   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 156   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
 157   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
 158   return 0;
 159 }
 160
 161 // Copy I444.
 162 LIBYUV_API
 163 int I444Copy(const uint8* src_y, int src_stride_y,
 164              const uint8* src_u, int src_stride_u,
 165              const uint8* src_v, int src_stride_v,
 166              uint8* dst_y, int dst_stride_y,
 167              uint8* dst_u, int dst_stride_u,
 168              uint8* dst_v, int dst_stride_v,
 169              int width, int height) {
 170   if (!src_y || !src_u || !src_v ||
 171       !dst_y || !dst_u || !dst_v ||
 172       width <= 0 || height == 0) {
 173     return -1;
 174   }
 175   // Negative height means invert the image.
 176   if (height < 0) {
 177     height = -height;
 178     src_y = src_y + (height - 1) * src_stride_y;
 179     src_u = src_u + (height - 1) * src_stride_u;
 180     src_v = src_v + (height - 1) * src_stride_v;
 181     src_stride_y = -src_stride_y;
 182     src_stride_u = -src_stride_u;
 183     src_stride_v = -src_stride_v;
 184   }
 185
 186   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 187   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
 188   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
 189   return 0;
 190 }
 191
 192 // Copy I400.
 193 LIBYUV_API
 194 int I400ToI400(const uint8* src_y, int src_stride_y,
 195                uint8* dst_y, int dst_stride_y,
 196                int width, int height) {
 197   if (!src_y || !dst_y || width <= 0 || height == 0) {
 198     return -1;
 199   }
 200   // Negative height means invert the image.
 201   if (height < 0) {
 202     height = -height;
 203     src_y = src_y + (height - 1) * src_stride_y;
 204     src_stride_y = -src_stride_y;
 205   }
 206   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 207   return 0;
 208 }
 209
 210 // Convert I420 to I400.
 211 LIBYUV_API
 212 int I420ToI400(const uint8* src_y, int src_stride_y,
 213                const uint8* src_u, int src_stride_u,
 214                const uint8* src_v, int src_stride_v,
 215                uint8* dst_y, int dst_stride_y,
 216                int width, int height) {
 217   if (!src_y || !dst_y || width <= 0 || height == 0) {
 218     return -1;
 219   }
 220   // Negative height means invert the image.
 221   if (height < 0) {
 222     height = -height;
 223     src_y = src_y + (height - 1) * src_stride_y;
 224     src_stride_y = -src_stride_y;
 225   }
 226   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 227   return 0;
 228 }
 229
 230 // Mirror a plane of data.
 231 void MirrorPlane(const uint8* src_y, int src_stride_y,
 232                  uint8* dst_y, int dst_stride_y,
 233                  int width, int height) {
 234   int y;
 235   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
 236   // Negative height means invert the image.
 237   if (height < 0) {
 238     height = -height;
 239     src_y = src_y + (height - 1) * src_stride_y;
 240     src_stride_y = -src_stride_y;
 241   }
 242 #if defined(HAS_MIRRORROW_NEON)
 243   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
 244     MirrorRow = MirrorRow_NEON;
 245   }
 246 #endif
 247 #if defined(HAS_MIRRORROW_SSE2)
 248   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
 249     MirrorRow = MirrorRow_SSE2;
 250   }
 251 #endif
 252 #if defined(HAS_MIRRORROW_SSSE3)
 253   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
 254     MirrorRow = MirrorRow_SSSE3;
 255   }
 256 #endif
 257 #if defined(HAS_MIRRORROW_AVX2)
 258   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
 259     MirrorRow = MirrorRow_AVX2;
 260   }
 261 #endif
 262
 263   // Mirror plane
 264   for (y = 0; y < height; ++y) {
 265     MirrorRow(src_y, dst_y, width);
 266     src_y += src_stride_y;
 267     dst_y += dst_stride_y;
 268   }
 269 }
 270
 271 // Convert YUY2 to I422.
 272 LIBYUV_API
 273 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
 274                uint8* dst_y, int dst_stride_y,
 275                uint8* dst_u, int dst_stride_u,
 276                uint8* dst_v, int dst_stride_v,
 277                int width, int height) {
 278   int y;
 279   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
 280                          uint8* dst_u, uint8* dst_v, int pix) =
 281       YUY2ToUV422Row_C;
 282   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
 283       YUY2ToYRow_C;
 284   // Negative height means invert the image.
 285   if (height < 0) {
 286     height = -height;
 287     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
 288     src_stride_yuy2 = -src_stride_yuy2;
 289   }
 290   // Coalesce rows.
 291   if (src_stride_yuy2 == width * 2 &&
 292       dst_stride_y == width &&
 293       dst_stride_u * 2 == width &&
 294       dst_stride_v * 2 == width) {
 295     width *= height;
 296     height = 1;
 297     src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
 298   }
 299 #if defined(HAS_YUY2TOYROW_SSE2)
 300   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
 301     YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
 302     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
 303     if (IS_ALIGNED(width, 16)) {
 304       YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
 305       YUY2ToYRow = YUY2ToYRow_SSE2;
 306     }
 307   }
 308 #endif
 309 #if defined(HAS_YUY2TOYROW_AVX2)
 310   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
 311     YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
 312     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
 313     if (IS_ALIGNED(width, 32)) {
 314       YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
 315       YUY2ToYRow = YUY2ToYRow_AVX2;
 316     }
 317   }
 318 #endif
 319 #if defined(HAS_YUY2TOYROW_NEON)
 320   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 321     YUY2ToYRow = YUY2ToYRow_Any_NEON;
 322     if (width >= 16) {
 323       YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
 324     }
 325     if (IS_ALIGNED(width, 16)) {
 326       YUY2ToYRow = YUY2ToYRow_NEON;
 327       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
 328     }
 329   }
 330 #endif
 331
 332   for (y = 0; y < height; ++y) {
 333     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
 334     YUY2ToYRow(src_yuy2, dst_y, width);
 335     src_yuy2 += src_stride_yuy2;
 336     dst_y += dst_stride_y;
 337     dst_u += dst_stride_u;
 338     dst_v += dst_stride_v;
 339   }
 340   return 0;
 341 }
 342
 343 // Convert UYVY to I422.
 344 LIBYUV_API
 345 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
 346                uint8* dst_y, int dst_stride_y,
 347                uint8* dst_u, int dst_stride_u,
 348                uint8* dst_v, int dst_stride_v,
 349                int width, int height) {
 350   int y;
 351   void (*UYVYToUV422Row)(const uint8* src_uyvy,
 352                          uint8* dst_u, uint8* dst_v, int pix) =
 353       UYVYToUV422Row_C;
 354   void (*UYVYToYRow)(const uint8* src_uyvy,
 355                      uint8* dst_y, int pix) = UYVYToYRow_C;
 356   // Negative height means invert the image.
 357   if (height < 0) {
 358     height = -height;
 359     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
 360     src_stride_uyvy = -src_stride_uyvy;
 361   }
 362   // Coalesce rows.
 363   if (src_stride_uyvy == width * 2 &&
 364       dst_stride_y == width &&
 365       dst_stride_u * 2 == width &&
 366       dst_stride_v * 2 == width) {
 367     width *= height;
 368     height = 1;
 369     src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
 370   }
 371 #if defined(HAS_UYVYTOYROW_SSE2)
 372   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
 373     UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
 374     UYVYToYRow = UYVYToYRow_Any_SSE2;
 375     if (IS_ALIGNED(width, 16)) {
 376       UYVYToUV422Row = UYVYToUV422Row_SSE2;
 377       UYVYToYRow = UYVYToYRow_SSE2;
 378     }
 379   }
 380 #endif
 381 #if defined(HAS_UYVYTOYROW_AVX2)
 382   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
 383     UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
 384     UYVYToYRow = UYVYToYRow_Any_AVX2;
 385     if (IS_ALIGNED(width, 32)) {
 386       UYVYToUV422Row = UYVYToUV422Row_AVX2;
 387       UYVYToYRow = UYVYToYRow_AVX2;
 388     }
 389   }
 390 #endif
 391 #if defined(HAS_UYVYTOYROW_NEON)
 392   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 393     UYVYToYRow = UYVYToYRow_Any_NEON;
 394     if (width >= 16) {
 395       UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
 396     }
 397     if (IS_ALIGNED(width, 16)) {
 398       UYVYToYRow = UYVYToYRow_NEON;
 399       UYVYToUV422Row = UYVYToUV422Row_NEON;
 400     }
 401   }
 402 #endif
 403
 404   for (y = 0; y < height; ++y) {
 405     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
 406     UYVYToYRow(src_uyvy, dst_y, width);
 407     src_uyvy += src_stride_uyvy;
 408     dst_y += dst_stride_y;
 409     dst_u += dst_stride_u;
 410     dst_v += dst_stride_v;
 411   }
 412   return 0;
 413 }
 414
 415 // Mirror I400 with optional flipping
 416 LIBYUV_API
 417 int I400Mirror(const uint8* src_y, int src_stride_y,
 418                uint8* dst_y, int dst_stride_y,
 419                int width, int height) {
 420   if (!src_y || !dst_y ||
 421       width <= 0 || height == 0) {
 422     return -1;
 423   }
 424   // Negative height means invert the image.
 425   if (height < 0) {
 426     height = -height;
 427     src_y = src_y + (height - 1) * src_stride_y;
 428     src_stride_y = -src_stride_y;
 429   }
 430
 431   MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 432   return 0;
 433 }
 434
 435 // Mirror I420 with optional flipping
 436 LIBYUV_API
 437 int I420Mirror(const uint8* src_y, int src_stride_y,
 438                const uint8* src_u, int src_stride_u,
 439                const uint8* src_v, int src_stride_v,
 440                uint8* dst_y, int dst_stride_y,
 441                uint8* dst_u, int dst_stride_u,
 442                uint8* dst_v, int dst_stride_v,
 443                int width, int height) {
 444   int halfwidth = (width + 1) >> 1;
 445   int halfheight = (height + 1) >> 1;
 446   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
 447       width <= 0 || height == 0) {
 448     return -1;
 449   }
 450   // Negative height means invert the image.
 451   if (height < 0) {
 452     height = -height;
 453     halfheight = (height + 1) >> 1;
 454     src_y = src_y + (height - 1) * src_stride_y;
 455     src_u = src_u + (halfheight - 1) * src_stride_u;
 456     src_v = src_v + (halfheight - 1) * src_stride_v;
 457     src_stride_y = -src_stride_y;
 458     src_stride_u = -src_stride_u;
 459     src_stride_v = -src_stride_v;
 460   }
 461
 462   if (dst_y) {
 463     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
 464   }
 465   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
 466   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
 467   return 0;
 468 }
 469
 470 // ARGB mirror.
 471 LIBYUV_API
 472 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
 473                uint8* dst_argb, int dst_stride_argb,
 474                int width, int height) {
 475   int y;
 476   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
 477       ARGBMirrorRow_C;
 478   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
 479     return -1;
 480   }
 481   // Negative height means invert the image.
 482   if (height < 0) {
 483     height = -height;
 484     src_argb = src_argb + (height - 1) * src_stride_argb;
 485     src_stride_argb = -src_stride_argb;
 486   }
 487
 488 #if defined(HAS_ARGBMIRRORROW_SSSE3)
 489   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
 490     ARGBMirrorRow = ARGBMirrorRow_SSSE3;
 491   }
 492 #endif
 493 #if defined(HAS_ARGBMIRRORROW_AVX2)
 494   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
 495     ARGBMirrorRow = ARGBMirrorRow_AVX2;
 496   }
 497 #endif
 498 #if defined(HAS_ARGBMIRRORROW_NEON)
 499   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
 500     ARGBMirrorRow = ARGBMirrorRow_NEON;
 501   }
 502 #endif
 503
 504   // Mirror plane
 505   for (y = 0; y < height; ++y) {
 506     ARGBMirrorRow(src_argb, dst_argb, width);
 507     src_argb += src_stride_argb;
 508     dst_argb += dst_stride_argb;
 509   }
 510   return 0;
 511 }
 512
 513 // Get a blender that optimized for the CPU, alignment and pixel count.
 514 // As there are 6 blenders to choose from, the caller should try to use
 515 // the same blend function for all pixels if possible.
 516 LIBYUV_API
 517 ARGBBlendRow GetARGBBlend() {
 518   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
 519                        uint8* dst_argb, int width) = ARGBBlendRow_C;
 520 #if defined(HAS_ARGBBLENDROW_SSSE3)
 521   if (TestCpuFlag(kCpuHasSSSE3)) {
 522     ARGBBlendRow = ARGBBlendRow_SSSE3;
 523     return ARGBBlendRow;
 524   }
 525 #endif
 526 #if defined(HAS_ARGBBLENDROW_SSE2)
 527   if (TestCpuFlag(kCpuHasSSE2)) {
 528     ARGBBlendRow = ARGBBlendRow_SSE2;
 529   }
 530 #endif
 531 #if defined(HAS_ARGBBLENDROW_NEON)
 532   if (TestCpuFlag(kCpuHasNEON)) {
 533     ARGBBlendRow = ARGBBlendRow_NEON;
 534   }
 535 #endif
 536   return ARGBBlendRow;
 537 }
 538
 539 // Alpha Blend 2 ARGB images and store to destination.
 540 LIBYUV_API
 541 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
 542               const uint8* src_argb1, int src_stride_argb1,
 543               uint8* dst_argb, int dst_stride_argb,
 544               int width, int height) {
 545   int y;
 546   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
 547                        uint8* dst_argb, int width) = GetARGBBlend();
 548   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 549     return -1;
 550   }
 551   // Negative height means invert the image.
 552   if (height < 0) {
 553     height = -height;
 554     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 555     dst_stride_argb = -dst_stride_argb;
 556   }
 557   // Coalesce rows.
 558   if (src_stride_argb0 == width * 4 &&
 559       src_stride_argb1 == width * 4 &&
 560       dst_stride_argb == width * 4) {
 561     width *= height;
 562     height = 1;
 563     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 564   }
 565
 566   for (y = 0; y < height; ++y) {
 567     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
 568     src_argb0 += src_stride_argb0;
 569     src_argb1 += src_stride_argb1;
 570     dst_argb += dst_stride_argb;
 571   }
 572   return 0;
 573 }
 574
 575 // Multiply 2 ARGB images and store to destination.
 576 LIBYUV_API
 577 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
 578                  const uint8* src_argb1, int src_stride_argb1,
 579                  uint8* dst_argb, int dst_stride_argb,
 580                  int width, int height) {
 581   int y;
 582   void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
 583                           int width) = ARGBMultiplyRow_C;
 584   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 585     return -1;
 586   }
 587   // Negative height means invert the image.
 588   if (height < 0) {
 589     height = -height;
 590     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 591     dst_stride_argb = -dst_stride_argb;
 592   }
 593   // Coalesce rows.
 594   if (src_stride_argb0 == width * 4 &&
 595       src_stride_argb1 == width * 4 &&
 596       dst_stride_argb == width * 4) {
 597     width *= height;
 598     height = 1;
 599     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 600   }
 601 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
 602   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
 603     ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
 604     if (IS_ALIGNED(width, 4)) {
 605       ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
 606     }
 607   }
 608 #endif
 609 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
 610   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
 611     ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
 612     if (IS_ALIGNED(width, 8)) {
 613       ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
 614     }
 615   }
 616 #endif
 617 #if defined(HAS_ARGBMULTIPLYROW_NEON)
 618   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 619     ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
 620     if (IS_ALIGNED(width, 8)) {
 621       ARGBMultiplyRow = ARGBMultiplyRow_NEON;
 622     }
 623   }
 624 #endif
 625
 626   // Multiply plane
 627   for (y = 0; y < height; ++y) {
 628     ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
 629     src_argb0 += src_stride_argb0;
 630     src_argb1 += src_stride_argb1;
 631     dst_argb += dst_stride_argb;
 632   }
 633   return 0;
 634 }
 635
 636 // Add 2 ARGB images and store to destination.
 637 LIBYUV_API
 638 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
 639             const uint8* src_argb1, int src_stride_argb1,
 640             uint8* dst_argb, int dst_stride_argb,
 641             int width, int height) {
 642   int y;
 643   void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
 644                      int width) = ARGBAddRow_C;
 645   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 646     return -1;
 647   }
 648   // Negative height means invert the image.
 649   if (height < 0) {
 650     height = -height;
 651     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 652     dst_stride_argb = -dst_stride_argb;
 653   }
 654   // Coalesce rows.
 655   if (src_stride_argb0 == width * 4 &&
 656       src_stride_argb1 == width * 4 &&
 657       dst_stride_argb == width * 4) {
 658     width *= height;
 659     height = 1;
 660     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 661   }
 662 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
 663   if (TestCpuFlag(kCpuHasSSE2)) {
 664     ARGBAddRow = ARGBAddRow_SSE2;
 665   }
 666 #endif
 667 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
 668   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
 669     ARGBAddRow = ARGBAddRow_Any_SSE2;
 670     if (IS_ALIGNED(width, 4)) {
 671       ARGBAddRow = ARGBAddRow_SSE2;
 672     }
 673   }
 674 #endif
 675 #if defined(HAS_ARGBADDROW_AVX2)
 676   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
 677     ARGBAddRow = ARGBAddRow_Any_AVX2;
 678     if (IS_ALIGNED(width, 8)) {
 679       ARGBAddRow = ARGBAddRow_AVX2;
 680     }
 681   }
 682 #endif
 683 #if defined(HAS_ARGBADDROW_NEON)
 684   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 685     ARGBAddRow = ARGBAddRow_Any_NEON;
 686     if (IS_ALIGNED(width, 8)) {
 687       ARGBAddRow = ARGBAddRow_NEON;
 688     }
 689   }
 690 #endif
 691
 692   // Add plane
 693   for (y = 0; y < height; ++y) {
 694     ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
 695     src_argb0 += src_stride_argb0;
 696     src_argb1 += src_stride_argb1;
 697     dst_argb += dst_stride_argb;
 698   }
 699   return 0;
 700 }
 701
 702 // Subtract 2 ARGB images and store to destination.
 703 LIBYUV_API
 704 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
 705                  const uint8* src_argb1, int src_stride_argb1,
 706                  uint8* dst_argb, int dst_stride_argb,
 707                  int width, int height) {
 708   int y;
 709   void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
 710                           int width) = ARGBSubtractRow_C;
 711   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
 712     return -1;
 713   }
 714   // Negative height means invert the image.
 715   if (height < 0) {
 716     height = -height;
 717     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
 718     dst_stride_argb = -dst_stride_argb;
 719   }
 720   // Coalesce rows.
 721   if (src_stride_argb0 == width * 4 &&
 722       src_stride_argb1 == width * 4 &&
 723       dst_stride_argb == width * 4) {
 724     width *= height;
 725     height = 1;
 726     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
 727   }
 728 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
 729   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
 730     ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
 731     if (IS_ALIGNED(width, 4)) {
 732       ARGBSubtractRow = ARGBSubtractRow_SSE2;
 733     }
 734   }
 735 #endif
 736 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
 737   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
 738     ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
 739     if (IS_ALIGNED(width, 8)) {
 740       ARGBSubtractRow = ARGBSubtractRow_AVX2;
 741     }
 742   }
 743 #endif
 744 #if defined(HAS_ARGBSUBTRACTROW_NEON)
 745   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 746     ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
 747     if (IS_ALIGNED(width, 8)) {
 748       ARGBSubtractRow = ARGBSubtractRow_NEON;
 749     }
 750   }
 751 #endif
 752
 753   // Subtract plane
 754   for (y = 0; y < height; ++y) {
 755     ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
 756     src_argb0 += src_stride_argb0;
 757     src_argb1 += src_stride_argb1;
 758     dst_argb += dst_stride_argb;
 759   }
 760   return 0;
 761 }
 762
 763 // Convert I422 to BGRA.
 764 LIBYUV_API
 765 int I422ToBGRA(const uint8* src_y, int src_stride_y,
 766                const uint8* src_u, int src_stride_u,
 767                const uint8* src_v, int src_stride_v,
 768                uint8* dst_bgra, int dst_stride_bgra,
 769                int width, int height) {
 770   int y;
 771   void (*I422ToBGRARow)(const uint8* y_buf,
 772                         const uint8* u_buf,
 773                         const uint8* v_buf,
 774                         uint8* rgb_buf,
 775                         int width) = I422ToBGRARow_C;
 776   if (!src_y || !src_u || !src_v ||
 777       !dst_bgra ||
 778       width <= 0 || height == 0) {
 779     return -1;
 780   }
 781   // Negative height means invert the image.
 782   if (height < 0) {
 783     height = -height;
 784     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
 785     dst_stride_bgra = -dst_stride_bgra;
 786   }
 787   // Coalesce rows.
 788   if (src_stride_y == width &&
 789       src_stride_u * 2 == width &&
 790       src_stride_v * 2 == width &&
 791       dst_stride_bgra == width * 4) {
 792     width *= height;
 793     height = 1;
 794     src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
 795   }
 796 #if defined(HAS_I422TOBGRAROW_SSSE3)
 797   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 798     I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
 799     if (IS_ALIGNED(width, 8)) {
 800       I422ToBGRARow = I422ToBGRARow_SSSE3;
 801     }
 802   }
 803 #endif
 804 #if defined(HAS_I422TOBGRAROW_AVX2)
 805   if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
 806     I422ToBGRARow = I422ToBGRARow_Any_AVX2;
 807     if (IS_ALIGNED(width, 16)) {
 808       I422ToBGRARow = I422ToBGRARow_AVX2;
 809     }
 810   }
 811 #endif
 812 #if defined(HAS_I422TOBGRAROW_NEON)
 813   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 814     I422ToBGRARow = I422ToBGRARow_Any_NEON;
 815     if (IS_ALIGNED(width, 8)) {
 816       I422ToBGRARow = I422ToBGRARow_NEON;
 817     }
 818   }
 819 #endif
 820 #if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
 821   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
 822       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
 823       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
 824       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
 825       IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
 826     I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
 827   }
 828 #endif
 829
 830   for (y = 0; y < height; ++y) {
 831     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
 832     dst_bgra += dst_stride_bgra;
 833     src_y += src_stride_y;
 834     src_u += src_stride_u;
 835     src_v += src_stride_v;
 836   }
 837   return 0;
 838 }
 839
 840 // Convert I422 to ABGR.
 841 LIBYUV_API
 842 int I422ToABGR(const uint8* src_y, int src_stride_y,
 843                const uint8* src_u, int src_stride_u,
 844                const uint8* src_v, int src_stride_v,
 845                uint8* dst_abgr, int dst_stride_abgr,
 846                int width, int height) {
 847   int y;
 848   void (*I422ToABGRRow)(const uint8* y_buf,
 849                         const uint8* u_buf,
 850                         const uint8* v_buf,
 851                         uint8* rgb_buf,
 852                         int width) = I422ToABGRRow_C;
 853   if (!src_y || !src_u || !src_v ||
 854       !dst_abgr ||
 855       width <= 0 || height == 0) {
 856     return -1;
 857   }
 858   // Negative height means invert the image.
 859   if (height < 0) {
 860     height = -height;
 861     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
 862     dst_stride_abgr = -dst_stride_abgr;
 863   }
 864   // Coalesce rows.
 865   if (src_stride_y == width &&
 866       src_stride_u * 2 == width &&
 867       src_stride_v * 2 == width &&
 868       dst_stride_abgr == width * 4) {
 869     width *= height;
 870     height = 1;
 871     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
 872   }
 873 #if defined(HAS_I422TOABGRROW_NEON)
 874   if (TestCpuFlag(kCpuHasNEON)) {
 875     I422ToABGRRow = I422ToABGRRow_Any_NEON;
 876     if (IS_ALIGNED(width, 16)) {
 877       I422ToABGRRow = I422ToABGRRow_NEON;
 878     }
 879   }
 880 #endif
 881 #if defined(HAS_I422TOABGRROW_SSSE3)
 882   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 883     I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
 884     if (IS_ALIGNED(width, 8)) {
 885       I422ToABGRRow = I422ToABGRRow_SSSE3;
 886     }
 887   }
 888 #endif
 889
 890   for (y = 0; y < height; ++y) {
 891     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
 892     dst_abgr += dst_stride_abgr;
 893     src_y += src_stride_y;
 894     src_u += src_stride_u;
 895     src_v += src_stride_v;
 896   }
 897   return 0;
 898 }
 899
 900 // Convert I422 to RGBA.
 901 LIBYUV_API
 902 int I422ToRGBA(const uint8* src_y, int src_stride_y,
 903                const uint8* src_u, int src_stride_u,
 904                const uint8* src_v, int src_stride_v,
 905                uint8* dst_rgba, int dst_stride_rgba,
 906                int width, int height) {
 907   int y;
 908   void (*I422ToRGBARow)(const uint8* y_buf,
 909                         const uint8* u_buf,
 910                         const uint8* v_buf,
 911                         uint8* rgb_buf,
 912                         int width) = I422ToRGBARow_C;
 913   if (!src_y || !src_u || !src_v ||
 914       !dst_rgba ||
 915       width <= 0 || height == 0) {
 916     return -1;
 917   }
 918   // Negative height means invert the image.
 919   if (height < 0) {
 920     height = -height;
 921     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
 922     dst_stride_rgba = -dst_stride_rgba;
 923   }
 924   // Coalesce rows.
 925   if (src_stride_y == width &&
 926       src_stride_u * 2 == width &&
 927       src_stride_v * 2 == width &&
 928       dst_stride_rgba == width * 4) {
 929     width *= height;
 930     height = 1;
 931     src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
 932   }
 933 #if defined(HAS_I422TORGBAROW_NEON)
 934   if (TestCpuFlag(kCpuHasNEON)) {
 935     I422ToRGBARow = I422ToRGBARow_Any_NEON;
 936     if (IS_ALIGNED(width, 16)) {
 937       I422ToRGBARow = I422ToRGBARow_NEON;
 938     }
 939   }
 940 #endif
 941 #if defined(HAS_I422TORGBAROW_SSSE3)
 942   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 943     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
 944     if (IS_ALIGNED(width, 8)) {
 945       I422ToRGBARow = I422ToRGBARow_SSSE3;
 946     }
 947   }
 948 #endif
 949
 950   for (y = 0; y < height; ++y) {
 951     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
 952     dst_rgba += dst_stride_rgba;
 953     src_y += src_stride_y;
 954     src_u += src_stride_u;
 955     src_v += src_stride_v;
 956   }
 957   return 0;
 958 }
 959
 960 // Convert NV12 to RGB565.
 961 LIBYUV_API
 962 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
 963                  const uint8* src_uv, int src_stride_uv,
 964                  uint8* dst_rgb565, int dst_stride_rgb565,
 965                  int width, int height) {
 966   int y;
 967   void (*NV12ToRGB565Row)(const uint8* y_buf,
 968                           const uint8* uv_buf,
 969                           uint8* rgb_buf,
 970                           int width) = NV12ToRGB565Row_C;
 971   if (!src_y || !src_uv || !dst_rgb565 ||
 972       width <= 0 || height == 0) {
 973     return -1;
 974   }
 975   // Negative height means invert the image.
 976   if (height < 0) {
 977     height = -height;
 978     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
 979     dst_stride_rgb565 = -dst_stride_rgb565;
 980   }
 981 #if defined(HAS_NV12TORGB565ROW_SSSE3)
 982   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
 983     NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
 984     if (IS_ALIGNED(width, 8)) {
 985       NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
 986     }
 987   }
 988 #endif
 989 #if defined(HAS_NV12TORGB565ROW_NEON)
 990   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
 991     NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
 992     if (IS_ALIGNED(width, 8)) {
 993       NV12ToRGB565Row = NV12ToRGB565Row_NEON;
 994     }
 995   }
 996 #endif
 997
 998   for (y = 0; y < height; ++y) {
 999     NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
1000     dst_rgb565 += dst_stride_rgb565;
1001     src_y += src_stride_y;
1002     if (y & 1) {
1003       src_uv += src_stride_uv;
1004     }
1005   }
1006   return 0;
1007 }
1008
1009 // Convert NV21 to RGB565.
1010 LIBYUV_API
1011 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
1012                  const uint8* src_vu, int src_stride_vu,
1013                  uint8* dst_rgb565, int dst_stride_rgb565,
1014                  int width, int height) {
1015   int y;
1016   void (*NV21ToRGB565Row)(const uint8* y_buf,
1017                           const uint8* src_vu,
1018                           uint8* rgb_buf,
1019                           int width) = NV21ToRGB565Row_C;
1020   if (!src_y || !src_vu || !dst_rgb565 ||
1021       width <= 0 || height == 0) {
1022     return -1;
1023   }
1024   // Negative height means invert the image.
1025   if (height < 0) {
1026     height = -height;
1027     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1028     dst_stride_rgb565 = -dst_stride_rgb565;
1029   }
1030 #if defined(HAS_NV21TORGB565ROW_SSSE3)
1031   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1032     NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
1033     if (IS_ALIGNED(width, 8)) {
1034       NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
1035     }
1036   }
1037 #endif
1038 #if defined(HAS_NV21TORGB565ROW_NEON)
1039   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1040     NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
1041     if (IS_ALIGNED(width, 8)) {
1042       NV21ToRGB565Row = NV21ToRGB565Row_NEON;
1043     }
1044   }
1045 #endif
1046
1047   for (y = 0; y < height; ++y) {
1048     NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
1049     dst_rgb565 += dst_stride_rgb565;
1050     src_y += src_stride_y;
1051     if (y & 1) {
1052       src_vu += src_stride_vu;
1053     }
1054   }
1055   return 0;
1056 }
1057
1058 LIBYUV_API
1059 void SetPlane(uint8* dst_y, int dst_stride_y,
1060               int width, int height,
1061               uint32 value) {
1062   int y;
1063   uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
1064   void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
1065   // Coalesce rows.
1066   if (dst_stride_y == width) {
1067     width *= height;
1068     height = 1;
1069     dst_stride_y = 0;
1070   }
1071 #if defined(HAS_SETROW_NEON)
1072   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
1073     SetRow = SetRow_NEON;
1074   }
1075 #endif
1076 #if defined(HAS_SETROW_X86)
1077   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
1078     SetRow = SetRow_X86;
1079   }
1080 #endif
1081
1082   // Set plane
1083   for (y = 0; y < height; ++y) {
1084     SetRow(dst_y, v32, width);
1085     dst_y += dst_stride_y;
1086   }
1087 }
1088
1089 // Draw a rectangle into I420
1090 LIBYUV_API
1091 int I420Rect(uint8* dst_y, int dst_stride_y,
1092              uint8* dst_u, int dst_stride_u,
1093              uint8* dst_v, int dst_stride_v,
1094              int x, int y,
1095              int width, int height,
1096              int value_y, int value_u, int value_v) {
1097   int halfwidth = (width + 1) >> 1;
1098   int halfheight = (height + 1) >> 1;
1099   uint8* start_y = dst_y + y * dst_stride_y + x;
1100   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1101   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1102   if (!dst_y || !dst_u || !dst_v ||
1103       width <= 0 || height <= 0 ||
1104       x < 0 || y < 0 ||
1105       value_y < 0 || value_y > 255 ||
1106       value_u < 0 || value_u > 255 ||
1107       value_v < 0 || value_v > 255) {
1108     return -1;
1109   }
1110
1111   SetPlane(start_y, dst_stride_y, width, height, value_y);
1112   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1113   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1114   return 0;
1115 }
1116
1117 // Draw a rectangle into ARGB
1118 LIBYUV_API
1119 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
1120              int dst_x, int dst_y,
1121              int width, int height,
1122              uint32 value) {
1123   if (!dst_argb ||
1124       width <= 0 || height <= 0 ||
1125       dst_x < 0 || dst_y < 0) {
1126     return -1;
1127   }
1128   dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1129   // Coalesce rows.
1130   if (dst_stride_argb == width * 4) {
1131     width *= height;
1132     height = 1;
1133     dst_stride_argb = 0;
1134   }
1135 #if defined(HAS_SETROW_NEON)
1136   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
1137     ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
1138     return 0;
1139   }
1140 #endif
1141 #if defined(HAS_SETROW_X86)
1142   if (TestCpuFlag(kCpuHasX86)) {
1143     ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
1144     return 0;
1145   }
1146 #endif
1147   ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
1148   return 0;
1149 }
1150
1151 // Convert unattentuated ARGB to preattenuated ARGB.
1152 // An unattenutated ARGB alpha blend uses the formula
1153 // p = a * f + (1 - a) * b
1154 // where
1155 //   p is output pixel
1156 //   f is foreground pixel
1157 //   b is background pixel
1158 //   a is alpha value from foreground pixel
1159 // An preattenutated ARGB alpha blend uses the formula
1160 // p = f + (1 - a) * b
1161 // where
1162 //   f is foreground pixel premultiplied by alpha
1163
1164 LIBYUV_API
1165 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1166                   uint8* dst_argb, int dst_stride_argb,
1167                   int width, int height) {
1168   int y;
1169   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1170                            int width) = ARGBAttenuateRow_C;
1171   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1172     return -1;
1173   }
1174   if (height < 0) {
1175     height = -height;
1176     src_argb = src_argb + (height - 1) * src_stride_argb;
1177     src_stride_argb = -src_stride_argb;
1178   }
1179   // Coalesce rows.
1180   if (src_stride_argb == width * 4 &&
1181       dst_stride_argb == width * 4) {
1182     width *= height;
1183     height = 1;
1184     src_stride_argb = dst_stride_argb = 0;
1185   }
1186 #if defined(HAS_ARGBATTENUATEROW_SSE2)
1187   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1188     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
1189     if (IS_ALIGNED(width, 4)) {
1190       ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
1191     }
1192   }
1193 #endif
1194 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1195   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1196     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1197     if (IS_ALIGNED(width, 4)) {
1198       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1199     }
1200   }
1201 #endif
1202 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1203   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1204     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1205     if (IS_ALIGNED(width, 8)) {
1206       ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1207     }
1208   }
1209 #endif
1210 #if defined(HAS_ARGBATTENUATEROW_NEON)
1211   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1212     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1213     if (IS_ALIGNED(width, 8)) {
1214       ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1215     }
1216   }
1217 #endif
1218
1219   for (y = 0; y < height; ++y) {
1220     ARGBAttenuateRow(src_argb, dst_argb, width);
1221     src_argb += src_stride_argb;
1222     dst_argb += dst_stride_argb;
1223   }
1224   return 0;
1225 }
1226
1227 // Convert preattentuated ARGB to unattenuated ARGB.
1228 LIBYUV_API
1229 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1230                     uint8* dst_argb, int dst_stride_argb,
1231                     int width, int height) {
1232   int y;
1233   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1234                              int width) = ARGBUnattenuateRow_C;
1235   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1236     return -1;
1237   }
1238   if (height < 0) {
1239     height = -height;
1240     src_argb = src_argb + (height - 1) * src_stride_argb;
1241     src_stride_argb = -src_stride_argb;
1242   }
1243   // Coalesce rows.
1244   if (src_stride_argb == width * 4 &&
1245       dst_stride_argb == width * 4) {
1246     width *= height;
1247     height = 1;
1248     src_stride_argb = dst_stride_argb = 0;
1249   }
1250 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1251   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1252     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1253     if (IS_ALIGNED(width, 4)) {
1254       ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1255     }
1256   }
1257 #endif
1258 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1259   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1260     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1261     if (IS_ALIGNED(width, 8)) {
1262       ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1263     }
1264   }
1265 #endif
1266 // TODO(fbarchard): Neon version.
1267
1268   for (y = 0; y < height; ++y) {
1269     ARGBUnattenuateRow(src_argb, dst_argb, width);
1270     src_argb += src_stride_argb;
1271     dst_argb += dst_stride_argb;
1272   }
1273   return 0;
1274 }
1275
1276 // Convert ARGB to Grayed ARGB.
1277 LIBYUV_API
1278 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1279                uint8* dst_argb, int dst_stride_argb,
1280                int width, int height) {
1281   int y;
1282   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1283                       int width) = ARGBGrayRow_C;
1284   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1285     return -1;
1286   }
1287   if (height < 0) {
1288     height = -height;
1289     src_argb = src_argb + (height - 1) * src_stride_argb;
1290     src_stride_argb = -src_stride_argb;
1291   }
1292   // Coalesce rows.
1293   if (src_stride_argb == width * 4 &&
1294       dst_stride_argb == width * 4) {
1295     width *= height;
1296     height = 1;
1297     src_stride_argb = dst_stride_argb = 0;
1298   }
1299 #if defined(HAS_ARGBGRAYROW_SSSE3)
1300   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1301     ARGBGrayRow = ARGBGrayRow_SSSE3;
1302   }
1303 #endif
1304 #if defined(HAS_ARGBGRAYROW_NEON)
1305   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1306     ARGBGrayRow = ARGBGrayRow_NEON;
1307   }
1308 #endif
1309
1310   for (y = 0; y < height; ++y) {
1311     ARGBGrayRow(src_argb, dst_argb, width);
1312     src_argb += src_stride_argb;
1313     dst_argb += dst_stride_argb;
1314   }
1315   return 0;
1316 }
1317
1318 // Make a rectangle of ARGB gray scale.
1319 LIBYUV_API
1320 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1321              int dst_x, int dst_y,
1322              int width, int height) {
1323   int y;
1324   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1325                       int width) = ARGBGrayRow_C;
1326   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1327   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1328     return -1;
1329   }
1330   // Coalesce rows.
1331   if (dst_stride_argb == width * 4) {
1332     width *= height;
1333     height = 1;
1334     dst_stride_argb = 0;
1335   }
1336 #if defined(HAS_ARGBGRAYROW_SSSE3)
1337   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1338     ARGBGrayRow = ARGBGrayRow_SSSE3;
1339   }
1340 #endif
1341 #if defined(HAS_ARGBGRAYROW_NEON)
1342   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1343     ARGBGrayRow = ARGBGrayRow_NEON;
1344   }
1345 #endif
1346   for (y = 0; y < height; ++y) {
1347     ARGBGrayRow(dst, dst, width);
1348     dst += dst_stride_argb;
1349   }
1350   return 0;
1351 }
1352
1353 // Make a rectangle of ARGB Sepia tone.
1354 LIBYUV_API
1355 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1356               int dst_x, int dst_y, int width, int height) {
1357   int y;
1358   void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1359   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1360   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1361     return -1;
1362   }
1363   // Coalesce rows.
1364   if (dst_stride_argb == width * 4) {
1365     width *= height;
1366     height = 1;
1367     dst_stride_argb = 0;
1368   }
1369 #if defined(HAS_ARGBSEPIAROW_SSSE3)
1370   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1371     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1372   }
1373 #endif
1374 #if defined(HAS_ARGBSEPIAROW_NEON)
1375   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1376     ARGBSepiaRow = ARGBSepiaRow_NEON;
1377   }
1378 #endif
1379   for (y = 0; y < height; ++y) {
1380     ARGBSepiaRow(dst, width);
1381     dst += dst_stride_argb;
1382   }
1383   return 0;
1384 }
1385
1386 // Apply a 4x4 matrix to each ARGB pixel.
1387 // Note: Normally for shading, but can be used to swizzle or invert.
1388 LIBYUV_API
1389 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
1390                     uint8* dst_argb, int dst_stride_argb,
1391                     const int8* matrix_argb,
1392                     int width, int height) {
1393   int y;
1394   void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1395       const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
1396   if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1397     return -1;
1398   }
1399   if (height < 0) {
1400     height = -height;
1401     src_argb = src_argb + (height - 1) * src_stride_argb;
1402     src_stride_argb = -src_stride_argb;
1403   }
1404   // Coalesce rows.
1405   if (src_stride_argb == width * 4 &&
1406       dst_stride_argb == width * 4) {
1407     width *= height;
1408     height = 1;
1409     src_stride_argb = dst_stride_argb = 0;
1410   }
1411 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1412   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1413     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1414   }
1415 #endif
1416 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
1417   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1418     ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1419   }
1420 #endif
1421   for (y = 0; y < height; ++y) {
1422     ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1423     src_argb += src_stride_argb;
1424     dst_argb += dst_stride_argb;
1425   }
1426   return 0;
1427 }
1428
1429 // Apply a 4x3 matrix to each ARGB pixel.
1430 // Deprecated.
1431 LIBYUV_API
1432 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1433                    const int8* matrix_rgb,
1434                    int dst_x, int dst_y, int width, int height) {
1435   SIMD_ALIGNED(int8 matrix_argb[16]);
1436   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1437   if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
1438       dst_x < 0 || dst_y < 0) {
1439     return -1;
1440   }
1441
1442   // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
1443   matrix_argb[0] = matrix_rgb[0] / 2;
1444   matrix_argb[1] = matrix_rgb[1] / 2;
1445   matrix_argb[2] = matrix_rgb[2] / 2;
1446   matrix_argb[3] = matrix_rgb[3] / 2;
1447   matrix_argb[4] = matrix_rgb[4] / 2;
1448   matrix_argb[5] = matrix_rgb[5] / 2;
1449   matrix_argb[6] = matrix_rgb[6] / 2;
1450   matrix_argb[7] = matrix_rgb[7] / 2;
1451   matrix_argb[8] = matrix_rgb[8] / 2;
1452   matrix_argb[9] = matrix_rgb[9] / 2;
1453   matrix_argb[10] = matrix_rgb[10] / 2;
1454   matrix_argb[11] = matrix_rgb[11] / 2;
1455   matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
1456   matrix_argb[15] = 64;  // 1.0
1457
1458   return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
1459                          dst, dst_stride_argb,
1460                          &matrix_argb[0], width, height);
1461 }
1462
1463 // Apply a color table each ARGB pixel.
1464 // Table contains 256 ARGB values.
1465 LIBYUV_API
1466 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1467                    const uint8* table_argb,
1468                    int dst_x, int dst_y, int width, int height) {
1469   int y;
1470   void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1471                             int width) = ARGBColorTableRow_C;
1472   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1473   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1474       dst_x < 0 || dst_y < 0) {
1475     return -1;
1476   }
1477   // Coalesce rows.
1478   if (dst_stride_argb == width * 4) {
1479     width *= height;
1480     height = 1;
1481     dst_stride_argb = 0;
1482   }
1483 #if defined(HAS_ARGBCOLORTABLEROW_X86)
1484   if (TestCpuFlag(kCpuHasX86)) {
1485     ARGBColorTableRow = ARGBColorTableRow_X86;
1486   }
1487 #endif
1488   for (y = 0; y < height; ++y) {
1489     ARGBColorTableRow(dst, table_argb, width);
1490     dst += dst_stride_argb;
1491   }
1492   return 0;
1493 }
1494
1495 // Apply a color table each ARGB pixel but preserve destination alpha.
1496 // Table contains 256 ARGB values.
1497 LIBYUV_API
1498 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
1499                   const uint8* table_argb,
1500                   int dst_x, int dst_y, int width, int height) {
1501   int y;
1502   void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1503                            int width) = RGBColorTableRow_C;
1504   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1505   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1506       dst_x < 0 || dst_y < 0) {
1507     return -1;
1508   }
1509   // Coalesce rows.
1510   if (dst_stride_argb == width * 4) {
1511     width *= height;
1512     height = 1;
1513     dst_stride_argb = 0;
1514   }
1515 #if defined(HAS_RGBCOLORTABLEROW_X86)
1516   if (TestCpuFlag(kCpuHasX86)) {
1517     RGBColorTableRow = RGBColorTableRow_X86;
1518   }
1519 #endif
1520   for (y = 0; y < height; ++y) {
1521     RGBColorTableRow(dst, table_argb, width);
1522     dst += dst_stride_argb;
1523   }
1524   return 0;
1525 }
1526
1527 // ARGBQuantize is used to posterize art.
1528 // e.g. rgb / qvalue * qvalue + qvalue / 2
1529 // But the low levels implement efficiently with 3 parameters, and could be
1530 // used for other high level operations.
1531 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1532 // where scale is 1 / interval_size as a fixed point value.
1533 // The divide is replaces with a multiply by reciprocal fixed point multiply.
1534 // Caveat - although SSE2 saturates, the C function does not and should be used
1535 // with care if doing anything but quantization.
1536 LIBYUV_API
1537 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1538                  int scale, int interval_size, int interval_offset,
1539                  int dst_x, int dst_y, int width, int height) {
1540   int y;
1541   void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1542                           int interval_offset, int width) = ARGBQuantizeRow_C;
1543   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1544   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1545       interval_size < 1 || interval_size > 255) {
1546     return -1;
1547   }
1548   // Coalesce rows.
1549   if (dst_stride_argb == width * 4) {
1550     width *= height;
1551     height = 1;
1552     dst_stride_argb = 0;
1553   }
1554 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
1555   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
1556     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1557   }
1558 #endif
1559 #if defined(HAS_ARGBQUANTIZEROW_NEON)
1560   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1561     ARGBQuantizeRow = ARGBQuantizeRow_NEON;
1562   }
1563 #endif
1564   for (y = 0; y < height; ++y) {
1565     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1566     dst += dst_stride_argb;
1567   }
1568   return 0;
1569 }
1570
1571 // Computes table of cumulative sum for image where the value is the sum
1572 // of all values above and to the left of the entry. Used by ARGBBlur.
1573 LIBYUV_API
1574 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1575                              int32* dst_cumsum, int dst_stride32_cumsum,
1576                              int width, int height) {
1577   int y;
1578   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1579       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1580   int32* previous_cumsum = dst_cumsum;
1581   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1582     return -1;
1583   }
1584 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1585   if (TestCpuFlag(kCpuHasSSE2)) {
1586     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1587   }
1588 #endif
1589   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
1590   for (y = 0; y < height; ++y) {
1591     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1592     previous_cumsum = dst_cumsum;
1593     dst_cumsum += dst_stride32_cumsum;
1594     src_argb += src_stride_argb;
1595   }
1596   return 0;
1597 }
1598
1599 // Blur ARGB image.
1600 // Caller should allocate CumulativeSum table of width * height * 16 bytes
1601 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1602 // as the buffer is treated as circular.
1603 LIBYUV_API
1604 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1605              uint8* dst_argb, int dst_stride_argb,
1606              int32* dst_cumsum, int dst_stride32_cumsum,
1607              int width, int height, int radius) {
1608   int y;
1609   void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
1610       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1611   void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
1612       int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
1613   int32* cumsum_bot_row;
1614   int32* max_cumsum_bot_row;
1615   int32* cumsum_top_row;
1616
1617   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1618     return -1;
1619   }
1620   if (height < 0) {
1621     height = -height;
1622     src_argb = src_argb + (height - 1) * src_stride_argb;
1623     src_stride_argb = -src_stride_argb;
1624   }
1625   if (radius > height) {
1626     radius = height;
1627   }
1628   if (radius > (width / 2 - 1)) {
1629     radius = width / 2 - 1;
1630   }
1631   if (radius <= 0) {
1632     return -1;
1633   }
1634 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1635   if (TestCpuFlag(kCpuHasSSE2)) {
1636     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1637     CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
1638   }
1639 #endif
1640   // Compute enough CumulativeSum for first row to be blurred. After this
1641   // one row of CumulativeSum is updated at a time.
1642   ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1643                            dst_cumsum, dst_stride32_cumsum,
1644                            width, radius);
1645
1646   src_argb = src_argb + radius * src_stride_argb;
1647   cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1648
1649   max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1650   cumsum_top_row = &dst_cumsum[0];
1651
1652   for (y = 0; y < height; ++y) {
1653     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1654     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1655     int area = radius * (bot_y - top_y);
1656     int boxwidth = radius * 4;
1657     int x;
1658     int n;
1659
1660     // Increment cumsum_top_row pointer with circular buffer wrap around.
1661     if (top_y) {
1662       cumsum_top_row += dst_stride32_cumsum;
1663       if (cumsum_top_row >= max_cumsum_bot_row) {
1664         cumsum_top_row = dst_cumsum;
1665       }
1666     }
1667     // Increment cumsum_bot_row pointer with circular buffer wrap around and
1668     // then fill in a row of CumulativeSum.
1669     if ((y + radius) < height) {
1670       const int32* prev_cumsum_bot_row = cumsum_bot_row;
1671       cumsum_bot_row += dst_stride32_cumsum;
1672       if (cumsum_bot_row >= max_cumsum_bot_row) {
1673         cumsum_bot_row = dst_cumsum;
1674       }
1675       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1676                               width);
1677       src_argb += src_stride_argb;
1678     }
1679
1680     // Left clipped.
1681     for (x = 0; x < radius + 1; ++x) {
1682       CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1683                                 boxwidth, area, &dst_argb[x * 4], 1);
1684       area += (bot_y - top_y);
1685       boxwidth += 4;
1686     }
1687
1688     // Middle unclipped.
1689     n = (width - 1) - radius - x + 1;
1690     CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1691                               boxwidth, area, &dst_argb[x * 4], n);
1692
1693     // Right clipped.
1694     for (x += n; x <= width - 1; ++x) {
1695       area -= (bot_y - top_y);
1696       boxwidth -= 4;
1697       CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
1698                                 cumsum_bot_row + (x - radius - 1) * 4,
1699                                 boxwidth, area, &dst_argb[x * 4], 1);
1700     }
1701     dst_argb += dst_stride_argb;
1702   }
1703   return 0;
1704 }
1705
1706 // Multiply ARGB image by a specified ARGB value.
1707 LIBYUV_API
1708 int ARGBShade(const uint8* src_argb, int src_stride_argb,
1709               uint8* dst_argb, int dst_stride_argb,
1710               int width, int height, uint32 value) {
1711   int y;
1712   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1713                        int width, uint32 value) = ARGBShadeRow_C;
1714   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1715     return -1;
1716   }
1717   if (height < 0) {
1718     height = -height;
1719     src_argb = src_argb + (height - 1) * src_stride_argb;
1720     src_stride_argb = -src_stride_argb;
1721   }
1722   // Coalesce rows.
1723   if (src_stride_argb == width * 4 &&
1724       dst_stride_argb == width * 4) {
1725     width *= height;
1726     height = 1;
1727     src_stride_argb = dst_stride_argb = 0;
1728   }
1729 #if defined(HAS_ARGBSHADEROW_SSE2)
1730   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
1731     ARGBShadeRow = ARGBShadeRow_SSE2;
1732   }
1733 #endif
1734 #if defined(HAS_ARGBSHADEROW_NEON)
1735   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1736     ARGBShadeRow = ARGBShadeRow_NEON;
1737   }
1738 #endif
1739
1740   for (y = 0; y < height; ++y) {
1741     ARGBShadeRow(src_argb, dst_argb, width, value);
1742     src_argb += src_stride_argb;
1743     dst_argb += dst_stride_argb;
1744   }
1745   return 0;
1746 }
1747
1748 // Interpolate 2 ARGB images by specified amount (0 to 255).
1749 LIBYUV_API
1750 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1751                     const uint8* src_argb1, int src_stride_argb1,
1752                     uint8* dst_argb, int dst_stride_argb,
1753                     int width, int height, int interpolation) {
1754   int y;
1755   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1756                          ptrdiff_t src_stride, int dst_width,
1757                          int source_y_fraction) = InterpolateRow_C;
1758   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1759     return -1;
1760   }
1761   // Negative height means invert the image.
1762   if (height < 0) {
1763     height = -height;
1764     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1765     dst_stride_argb = -dst_stride_argb;
1766   }
1767   // Coalesce rows.
1768   if (src_stride_argb0 == width * 4 &&
1769       src_stride_argb1 == width * 4 &&
1770       dst_stride_argb == width * 4) {
1771     width *= height;
1772     height = 1;
1773     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1774   }
1775 #if defined(HAS_INTERPOLATEROW_SSE2)
1776   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1777     InterpolateRow = InterpolateRow_Any_SSE2;
1778     if (IS_ALIGNED(width, 4)) {
1779       InterpolateRow = InterpolateRow_SSE2;
1780     }
1781   }
1782 #endif
1783 #if defined(HAS_INTERPOLATEROW_SSSE3)
1784   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1785     InterpolateRow = InterpolateRow_Any_SSSE3;
1786     if (IS_ALIGNED(width, 4)) {
1787       InterpolateRow = InterpolateRow_SSSE3;
1788     }
1789   }
1790 #endif
1791 #if defined(HAS_INTERPOLATEROW_AVX2)
1792   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1793     InterpolateRow = InterpolateRow_Any_AVX2;
1794     if (IS_ALIGNED(width, 8)) {
1795       InterpolateRow = InterpolateRow_AVX2;
1796     }
1797   }
1798 #endif
1799 #if defined(HAS_INTERPOLATEROW_NEON)
1800   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1801     InterpolateRow = InterpolateRow_Any_NEON;
1802     if (IS_ALIGNED(width, 4)) {
1803       InterpolateRow = InterpolateRow_NEON;
1804     }
1805   }
1806 #endif
1807 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
1808   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
1809       IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
1810       IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
1811       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
1812     ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
1813   }
1814 #endif
1815
1816   for (y = 0; y < height; ++y) {
1817     InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
1818                    width * 4, interpolation);
1819     src_argb0 += src_stride_argb0;
1820     src_argb1 += src_stride_argb1;
1821     dst_argb += dst_stride_argb;
1822   }
1823   return 0;
1824 }
1825
1826 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
1827 LIBYUV_API
1828 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
1829                 uint8* dst_argb, int dst_stride_argb,
1830                 const uint8* shuffler, int width, int height) {
1831   int y;
1832   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
1833                          const uint8* shuffler, int pix) = ARGBShuffleRow_C;
1834   if (!src_bgra || !dst_argb ||
1835       width <= 0 || height == 0) {
1836     return -1;
1837   }
1838   // Negative height means invert the image.
1839   if (height < 0) {
1840     height = -height;
1841     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
1842     src_stride_bgra = -src_stride_bgra;
1843   }
1844   // Coalesce rows.
1845   if (src_stride_bgra == width * 4 &&
1846       dst_stride_argb == width * 4) {
1847     width *= height;
1848     height = 1;
1849     src_stride_bgra = dst_stride_argb = 0;
1850   }
1851 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
1852   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1853     ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
1854     if (IS_ALIGNED(width, 4)) {
1855       ARGBShuffleRow = ARGBShuffleRow_SSE2;
1856     }
1857   }
1858 #endif
1859 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
1860   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1861     ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
1862     if (IS_ALIGNED(width, 8)) {
1863       ARGBShuffleRow = ARGBShuffleRow_SSSE3;
1864     }
1865   }
1866 #endif
1867 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
1868   if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
1869     ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
1870     if (IS_ALIGNED(width, 16)) {
1871       ARGBShuffleRow = ARGBShuffleRow_AVX2;
1872     }
1873   }
1874 #endif
1875 #if defined(HAS_ARGBSHUFFLEROW_NEON)
1876   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1877     ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
1878     if (IS_ALIGNED(width, 4)) {
1879       ARGBShuffleRow = ARGBShuffleRow_NEON;
1880     }
1881   }
1882 #endif
1883
1884   for (y = 0; y < height; ++y) {
1885     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
1886     src_bgra += src_stride_bgra;
1887     dst_argb += dst_stride_argb;
1888   }
1889   return 0;
1890 }
1891
1892 // Sobel ARGB effect.
1893 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
1894                         uint8* dst_argb, int dst_stride_argb,
1895                         int width, int height,
1896                         void (*SobelRow)(const uint8* src_sobelx,
1897                                          const uint8* src_sobely,
1898                                          uint8* dst, int width)) {
1899   int y;
1900   void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
1901                          uint32 selector, int pix) = ARGBToBayerGGRow_C;
1902   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
1903                     uint8* dst_sobely, int width) = SobelYRow_C;
1904   void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
1905                     const uint8* src_y2, uint8* dst_sobely, int width) =
1906       SobelXRow_C;
1907   const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
1908   if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
1909     return -1;
1910   }
1911   // Negative height means invert the image.
1912   if (height < 0) {
1913     height = -height;
1914     src_argb  = src_argb  + (height - 1) * src_stride_argb;
1915     src_stride_argb = -src_stride_argb;
1916   }
1917   // ARGBToBayer used to select G channel from ARGB.
1918 #if defined(HAS_ARGBTOBAYERGGROW_SSE2)
1919   if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
1920     ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
1921     if (IS_ALIGNED(width, 8)) {
1922       ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
1923     }
1924   }
1925 #endif
1926 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
1927   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1928     ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
1929     if (IS_ALIGNED(width, 8)) {
1930       ARGBToBayerRow = ARGBToBayerRow_SSSE3;
1931     }
1932   }
1933 #endif
1934 #if defined(HAS_ARGBTOBAYERGGROW_NEON)
1935   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1936     ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
1937     if (IS_ALIGNED(width, 8)) {
1938       ARGBToBayerRow = ARGBToBayerGGRow_NEON;
1939     }
1940   }
1941 #endif
1942 #if defined(HAS_SOBELYROW_SSE2)
1943   if (TestCpuFlag(kCpuHasSSE2)) {
1944     SobelYRow = SobelYRow_SSE2;
1945   }
1946 #endif
1947 #if defined(HAS_SOBELYROW_NEON)
1948   if (TestCpuFlag(kCpuHasNEON)) {
1949     SobelYRow = SobelYRow_NEON;
1950   }
1951 #endif
1952 #if defined(HAS_SOBELXROW_SSE2)
1953   if (TestCpuFlag(kCpuHasSSE2)) {
1954     SobelXRow = SobelXRow_SSE2;
1955   }
1956 #endif
1957 #if defined(HAS_SOBELXROW_NEON)
1958   if (TestCpuFlag(kCpuHasNEON)) {
1959     SobelXRow = SobelXRow_NEON;
1960   }
1961 #endif
1962   {
1963     // 3 rows with edges before/after.
1964     const int kRowSize = (width + kEdge + 15) & ~15;
1965     align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
1966     uint8* row_sobelx = rows;
1967     uint8* row_sobely = rows + kRowSize;
1968     uint8* row_y = rows + kRowSize * 2;
1969
1970     // Convert first row.
1971     uint8* row_y0 = row_y + kEdge;
1972     uint8* row_y1 = row_y0 + kRowSize;
1973     uint8* row_y2 = row_y1 + kRowSize;
1974     ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
1975     row_y0[-1] = row_y0[0];
1976     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
1977     ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
1978     row_y1[-1] = row_y1[0];
1979     memset(row_y1 + width, row_y1[width - 1], 16);
1980     memset(row_y2 + width, 0, 16);
1981
1982     for (y = 0; y < height; ++y) {
1983       // Convert next row of ARGB to Y.
1984       if (y < (height - 1)) {
1985         src_argb += src_stride_argb;
1986       }
1987       ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
1988       row_y2[-1] = row_y2[0];
1989       row_y2[width] = row_y2[width - 1];
1990
1991       SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
1992       SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
1993       SobelRow(row_sobelx, row_sobely, dst_argb, width);
1994
1995       // Cycle thru circular queue of 3 row_y buffers.
1996       {
1997         uint8* row_yt = row_y0;
1998         row_y0 = row_y1;
1999         row_y1 = row_y2;
2000         row_y2 = row_yt;
2001       }
2002
2003       dst_argb += dst_stride_argb;
2004     }
2005     free_aligned_buffer_64(rows);
2006   }
2007   return 0;
2008 }
2009
2010 // Sobel ARGB effect.
2011 LIBYUV_API
2012 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
2013               uint8* dst_argb, int dst_stride_argb,
2014               int width, int height) {
2015   void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
2016                    uint8* dst_argb, int width) = SobelRow_C;
2017 #if defined(HAS_SOBELROW_SSE2)
2018   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
2019     SobelRow = SobelRow_SSE2;
2020   }
2021 #endif
2022 #if defined(HAS_SOBELROW_NEON)
2023   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2024     SobelRow = SobelRow_NEON;
2025   }
2026 #endif
2027   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2028                       width, height, SobelRow);
2029 }
2030
2031 // Sobel ARGB effect with planar output.
2032 LIBYUV_API
2033 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
2034                      uint8* dst_y, int dst_stride_y,
2035                      int width, int height) {
2036   void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2037                           uint8* dst_, int width) = SobelToPlaneRow_C;
2038 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2039   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
2040     SobelToPlaneRow = SobelToPlaneRow_SSE2;
2041   }
2042 #endif
2043 #if defined(HAS_SOBELTOPLANEROW_NEON)
2044   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
2045     SobelToPlaneRow = SobelToPlaneRow_NEON;
2046   }
2047 #endif
2048   return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
2049                       width, height, SobelToPlaneRow);
2050 }
2051
2052 // SobelXY ARGB effect.
2053 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
2054 LIBYUV_API
2055 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
2056                 uint8* dst_argb, int dst_stride_argb,
2057                 int width, int height) {
2058   void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2059                      uint8* dst_argb, int width) = SobelXYRow_C;
2060 #if defined(HAS_SOBELXYROW_SSE2)
2061   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
2062     SobelXYRow = SobelXYRow_SSE2;
2063   }
2064 #endif
2065 #if defined(HAS_SOBELXYROW_NEON)
2066   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2067     SobelXYRow = SobelXYRow_NEON;
2068   }
2069 #endif
2070   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2071                       width, height, SobelXYRow);
2072 }
2073
2074 // Apply a 4x4 polynomial to each ARGB pixel.
2075 LIBYUV_API
2076 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
2077                    uint8* dst_argb, int dst_stride_argb,
2078                    const float* poly,
2079                    int width, int height) {
2080   int y;
2081   void (*ARGBPolynomialRow)(const uint8* src_argb,
2082                             uint8* dst_argb, const float* poly,
2083                             int width) = ARGBPolynomialRow_C;
2084   if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2085     return -1;
2086   }
2087   // Negative height means invert the image.
2088   if (height < 0) {
2089     height = -height;
2090     src_argb  = src_argb  + (height - 1) * src_stride_argb;
2091     src_stride_argb = -src_stride_argb;
2092   }
2093   // Coalesce rows.
2094   if (src_stride_argb == width * 4 &&
2095       dst_stride_argb == width * 4) {
2096     width *= height;
2097     height = 1;
2098     src_stride_argb = dst_stride_argb = 0;
2099   }
2100 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2101   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2102     ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2103   }
2104 #endif
2105 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2106   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2107       IS_ALIGNED(width, 2)) {
2108     ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2109   }
2110 #endif
2111
2112   for (y = 0; y < height; ++y) {
2113     ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2114     src_argb += src_stride_argb;
2115     dst_argb += dst_stride_argb;
2116   }
2117   return 0;
2118 }
2119
2120 // Apply a lumacolortable to each ARGB pixel.
2121 LIBYUV_API
2122 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
2123                        uint8* dst_argb, int dst_stride_argb,
2124                        const uint8* luma,
2125                        int width, int height) {
2126   int y;
2127   void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
2128       int width, const uint8* luma, const uint32 lumacoeff) =
2129       ARGBLumaColorTableRow_C;
2130   if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2131     return -1;
2132   }
2133   // Negative height means invert the image.
2134   if (height < 0) {
2135     height = -height;
2136     src_argb  = src_argb  + (height - 1) * src_stride_argb;
2137     src_stride_argb = -src_stride_argb;
2138   }
2139   // Coalesce rows.
2140   if (src_stride_argb == width * 4 &&
2141       dst_stride_argb == width * 4) {
2142     width *= height;
2143     height = 1;
2144     src_stride_argb = dst_stride_argb = 0;
2145   }
2146 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2147   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2148     ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2149   }
2150 #endif
2151
2152   for (y = 0; y < height; ++y) {
2153     ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2154     src_argb += src_stride_argb;
2155     dst_argb += dst_stride_argb;
2156   }
2157   return 0;
2158 }
2159
2160 // Copy Alpha from one ARGB image to another.
2161 LIBYUV_API
2162 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
2163                   uint8* dst_argb, int dst_stride_argb,
2164                   int width, int height) {
2165   int y;
2166   void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2167       ARGBCopyAlphaRow_C;
2168   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2169     return -1;
2170   }
2171   // Negative height means invert the image.
2172   if (height < 0) {
2173     height = -height;
2174     src_argb = src_argb + (height - 1) * src_stride_argb;
2175     src_stride_argb = -src_stride_argb;
2176   }
2177   // Coalesce rows.
2178   if (src_stride_argb == width * 4 &&
2179       dst_stride_argb == width * 4) {
2180     width *= height;
2181     height = 1;
2182     src_stride_argb = dst_stride_argb = 0;
2183   }
2184 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2185   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
2186     ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2187   }
2188 #endif
2189 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2190   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2191     ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2192   }
2193 #endif
2194
2195   for (y = 0; y < height; ++y) {
2196     ARGBCopyAlphaRow(src_argb, dst_argb, width);
2197     src_argb += src_stride_argb;
2198     dst_argb += dst_stride_argb;
2199   }
2200   return 0;
2201 }
2202
2203 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
2204 LIBYUV_API
2205 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
2206                      uint8* dst_argb, int dst_stride_argb,
2207                      int width, int height) {
2208   int y;
2209   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
2210       ARGBCopyYToAlphaRow_C;
2211   if (!src_y || !dst_argb || width <= 0 || height == 0) {
2212     return -1;
2213   }
2214   // Negative height means invert the image.
2215   if (height < 0) {
2216     height = -height;
2217     src_y = src_y + (height - 1) * src_stride_y;
2218     src_stride_y = -src_stride_y;
2219   }
2220   // Coalesce rows.
2221   if (src_stride_y == width &&
2222       dst_stride_argb == width * 4) {
2223     width *= height;
2224     height = 1;
2225     src_stride_y = dst_stride_argb = 0;
2226   }
2227 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
2228   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
2229     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
2230   }
2231 #endif
2232 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
2233   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2234     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
2235   }
2236 #endif
2237
2238   for (y = 0; y < height; ++y) {
2239     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
2240     src_y += src_stride_y;
2241     dst_argb += dst_stride_argb;
2242   }
2243   return 0;
2244 }
2245
2246 #ifdef __cplusplus
2247 }  // extern "C"
2248 }  // namespace libyuv
2249 #endif