src/third_party/libvpx/source/libvpx/third_party/libyuv/source/scale_common.cc

   1 /*
   2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS. All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "libyuv/scale.h"
  12
  13 #include <assert.h>
  14 #include <string.h>
  15
  16 #include "libyuv/cpu_id.h"
  17 #include "libyuv/planar_functions.h"  // For CopyARGB
  18 #include "libyuv/row.h"
  19 #include "libyuv/scale_row.h"
  20
  21 #ifdef __cplusplus
  22 namespace libyuv {
  23 extern "C" {
  24 #endif
  25
  26 static __inline int Abs(int v) {
  27   return v >= 0 ? v : -v;
  28 }
  29
  30 // CPU agnostic row functions
  31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
  32                      uint8* dst, int dst_width) {
  33   int x;
  34   for (x = 0; x < dst_width - 1; x += 2) {
  35     dst[0] = src_ptr[1];
  36     dst[1] = src_ptr[3];
  37     dst += 2;
  38     src_ptr += 4;
  39   }
  40   if (dst_width & 1) {
  41     dst[0] = src_ptr[1];
  42   }
  43 }
  44
  45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  46                         uint16* dst, int dst_width) {
  47   int x;
  48   for (x = 0; x < dst_width - 1; x += 2) {
  49     dst[0] = src_ptr[1];
  50     dst[1] = src_ptr[3];
  51     dst += 2;
  52     src_ptr += 4;
  53   }
  54   if (dst_width & 1) {
  55     dst[0] = src_ptr[1];
  56   }
  57 }
  58
  59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
  60                            uint8* dst, int dst_width) {
  61   const uint8* s = src_ptr;
  62   int x;
  63   for (x = 0; x < dst_width - 1; x += 2) {
  64     dst[0] = (s[0] + s[1] + 1) >> 1;
  65     dst[1] = (s[2] + s[3] + 1) >> 1;
  66     dst += 2;
  67     s += 4;
  68   }
  69   if (dst_width & 1) {
  70     dst[0] = (s[0] + s[1] + 1) >> 1;
  71   }
  72 }
  73
  74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  75                               uint16* dst, int dst_width) {
  76   const uint16* s = src_ptr;
  77   int x;
  78   for (x = 0; x < dst_width - 1; x += 2) {
  79     dst[0] = (s[0] + s[1] + 1) >> 1;
  80     dst[1] = (s[2] + s[3] + 1) >> 1;
  81     dst += 2;
  82     s += 4;
  83   }
  84   if (dst_width & 1) {
  85     dst[0] = (s[0] + s[1] + 1) >> 1;
  86   }
  87 }
  88
  89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
  90                         uint8* dst, int dst_width) {
  91   const uint8* s = src_ptr;
  92   const uint8* t = src_ptr + src_stride;
  93   int x;
  94   for (x = 0; x < dst_width - 1; x += 2) {
  95     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  96     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  97     dst += 2;
  98     s += 4;
  99     t += 4;
 100   }
 101   if (dst_width & 1) {
 102     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
 103   }
 104 }
 105
 106 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 107                            uint16* dst, int dst_width) {
 108   const uint16* s = src_ptr;
 109   const uint16* t = src_ptr + src_stride;
 110   int x;
 111   for (x = 0; x < dst_width - 1; x += 2) {
 112     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
 113     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
 114     dst += 2;
 115     s += 4;
 116     t += 4;
 117   }
 118   if (dst_width & 1) {
 119     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
 120   }
 121 }
 122
 123 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
 124                      uint8* dst, int dst_width) {
 125   int x;
 126   for (x = 0; x < dst_width - 1; x += 2) {
 127     dst[0] = src_ptr[2];
 128     dst[1] = src_ptr[6];
 129     dst += 2;
 130     src_ptr += 8;
 131   }
 132   if (dst_width & 1) {
 133     dst[0] = src_ptr[2];
 134   }
 135 }
 136
 137 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 138                         uint16* dst, int dst_width) {
 139   int x;
 140   for (x = 0; x < dst_width - 1; x += 2) {
 141     dst[0] = src_ptr[2];
 142     dst[1] = src_ptr[6];
 143     dst += 2;
 144     src_ptr += 8;
 145   }
 146   if (dst_width & 1) {
 147     dst[0] = src_ptr[2];
 148   }
 149 }
 150
 151 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
 152                         uint8* dst, int dst_width) {
 153   intptr_t stride = src_stride;
 154   int x;
 155   for (x = 0; x < dst_width - 1; x += 2) {
 156     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
 157              src_ptr[stride + 0] + src_ptr[stride + 1] +
 158              src_ptr[stride + 2] + src_ptr[stride + 3] +
 159              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
 160              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
 161              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
 162              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
 163              8) >> 4;
 164     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
 165              src_ptr[stride + 4] + src_ptr[stride + 5] +
 166              src_ptr[stride + 6] + src_ptr[stride + 7] +
 167              src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
 168              src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
 169              src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
 170              src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
 171              8) >> 4;
 172     dst += 2;
 173     src_ptr += 8;
 174   }
 175   if (dst_width & 1) {
 176     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
 177              src_ptr[stride + 0] + src_ptr[stride + 1] +
 178              src_ptr[stride + 2] + src_ptr[stride + 3] +
 179              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
 180              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
 181              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
 182              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
 183              8) >> 4;
 184   }
 185 }
 186
 187 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 188                            uint16* dst, int dst_width) {
 189   intptr_t stride = src_stride;
 190   int x;
 191   for (x = 0; x < dst_width - 1; x += 2) {
 192     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
 193              src_ptr[stride + 0] + src_ptr[stride + 1] +
 194              src_ptr[stride + 2] + src_ptr[stride + 3] +
 195              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
 196              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
 197              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
 198              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
 199              8) >> 4;
 200     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
 201              src_ptr[stride + 4] + src_ptr[stride + 5] +
 202              src_ptr[stride + 6] + src_ptr[stride + 7] +
 203              src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
 204              src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
 205              src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
 206              src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
 207              8) >> 4;
 208     dst += 2;
 209     src_ptr += 8;
 210   }
 211   if (dst_width & 1) {
 212     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
 213              src_ptr[stride + 0] + src_ptr[stride + 1] +
 214              src_ptr[stride + 2] + src_ptr[stride + 3] +
 215              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
 216              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
 217              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
 218              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
 219              8) >> 4;
 220   }
 221 }
 222
 223 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
 224                       uint8* dst, int dst_width) {
 225   int x;
 226   assert((dst_width % 3 == 0) && (dst_width > 0));
 227   for (x = 0; x < dst_width; x += 3) {
 228     dst[0] = src_ptr[0];
 229     dst[1] = src_ptr[1];
 230     dst[2] = src_ptr[3];
 231     dst += 3;
 232     src_ptr += 4;
 233   }
 234 }
 235
 236 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 237                          uint16* dst, int dst_width) {
 238   int x;
 239   assert((dst_width % 3 == 0) && (dst_width > 0));
 240   for (x = 0; x < dst_width; x += 3) {
 241     dst[0] = src_ptr[0];
 242     dst[1] = src_ptr[1];
 243     dst[2] = src_ptr[3];
 244     dst += 3;
 245     src_ptr += 4;
 246   }
 247 }
 248
 249 // Filter rows 0 and 1 together, 3 : 1
 250 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
 251                             uint8* d, int dst_width) {
 252   const uint8* s = src_ptr;
 253   const uint8* t = src_ptr + src_stride;
 254   int x;
 255   assert((dst_width % 3 == 0) && (dst_width > 0));
 256   for (x = 0; x < dst_width; x += 3) {
 257     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
 258     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
 259     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
 260     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
 261     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
 262     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
 263     d[0] = (a0 * 3 + b0 + 2) >> 2;
 264     d[1] = (a1 * 3 + b1 + 2) >> 2;
 265     d[2] = (a2 * 3 + b2 + 2) >> 2;
 266     d += 3;
 267     s += 4;
 268     t += 4;
 269   }
 270 }
 271
 272 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 273                                uint16* d, int dst_width) {
 274   const uint16* s = src_ptr;
 275   const uint16* t = src_ptr + src_stride;
 276   int x;
 277   assert((dst_width % 3 == 0) && (dst_width > 0));
 278   for (x = 0; x < dst_width; x += 3) {
 279     uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
 280     uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
 281     uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
 282     uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
 283     uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
 284     uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
 285     d[0] = (a0 * 3 + b0 + 2) >> 2;
 286     d[1] = (a1 * 3 + b1 + 2) >> 2;
 287     d[2] = (a2 * 3 + b2 + 2) >> 2;
 288     d += 3;
 289     s += 4;
 290     t += 4;
 291   }
 292 }
 293
 294 // Filter rows 1 and 2 together, 1 : 1
 295 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
 296                             uint8* d, int dst_width) {
 297   const uint8* s = src_ptr;
 298   const uint8* t = src_ptr + src_stride;
 299   int x;
 300   assert((dst_width % 3 == 0) && (dst_width > 0));
 301   for (x = 0; x < dst_width; x += 3) {
 302     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
 303     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
 304     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
 305     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
 306     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
 307     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
 308     d[0] = (a0 + b0 + 1) >> 1;
 309     d[1] = (a1 + b1 + 1) >> 1;
 310     d[2] = (a2 + b2 + 1) >> 1;
 311     d += 3;
 312     s += 4;
 313     t += 4;
 314   }
 315 }
 316
 317 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 318                                uint16* d, int dst_width) {
 319   const uint16* s = src_ptr;
 320   const uint16* t = src_ptr + src_stride;
 321   int x;
 322   assert((dst_width % 3 == 0) && (dst_width > 0));
 323   for (x = 0; x < dst_width; x += 3) {
 324     uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
 325     uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
 326     uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
 327     uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
 328     uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
 329     uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
 330     d[0] = (a0 + b0 + 1) >> 1;
 331     d[1] = (a1 + b1 + 1) >> 1;
 332     d[2] = (a2 + b2 + 1) >> 1;
 333     d += 3;
 334     s += 4;
 335     t += 4;
 336   }
 337 }
 338
 339 // Scales a single row of pixels using point sampling.
 340 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
 341                  int dst_width, int x, int dx) {
 342   int j;
 343   for (j = 0; j < dst_width - 1; j += 2) {
 344     dst_ptr[0] = src_ptr[x >> 16];
 345     x += dx;
 346     dst_ptr[1] = src_ptr[x >> 16];
 347     x += dx;
 348     dst_ptr += 2;
 349   }
 350   if (dst_width & 1) {
 351     dst_ptr[0] = src_ptr[x >> 16];
 352   }
 353 }
 354
 355 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
 356                     int dst_width, int x, int dx) {
 357   int j;
 358   for (j = 0; j < dst_width - 1; j += 2) {
 359     dst_ptr[0] = src_ptr[x >> 16];
 360     x += dx;
 361     dst_ptr[1] = src_ptr[x >> 16];
 362     x += dx;
 363     dst_ptr += 2;
 364   }
 365   if (dst_width & 1) {
 366     dst_ptr[0] = src_ptr[x >> 16];
 367   }
 368 }
 369
 370 // Scales a single row of pixels up by 2x using point sampling.
 371 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
 372                     int dst_width, int x, int dx) {
 373   int j;
 374   for (j = 0; j < dst_width - 1; j += 2) {
 375     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
 376     src_ptr += 1;
 377     dst_ptr += 2;
 378   }
 379   if (dst_width & 1) {
 380     dst_ptr[0] = src_ptr[0];
 381   }
 382 }
 383
 384 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
 385                        int dst_width, int x, int dx) {
 386   int j;
 387   for (j = 0; j < dst_width - 1; j += 2) {
 388     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
 389     src_ptr += 1;
 390     dst_ptr += 2;
 391   }
 392   if (dst_width & 1) {
 393     dst_ptr[0] = src_ptr[0];
 394   }
 395 }
 396
 397 // (1-f)a + fb can be replaced with a + f(b-a)
 398 #define BLENDER(a, b, f) (uint8)((int)(a) + \
 399     ((int)(f) * ((int)(b) - (int)(a)) >> 16))
 400
 401 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
 402                        int dst_width, int x, int dx) {
 403   int j;
 404   for (j = 0; j < dst_width - 1; j += 2) {
 405     int xi = x >> 16;
 406     int a = src_ptr[xi];
 407     int b = src_ptr[xi + 1];
 408     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 409     x += dx;
 410     xi = x >> 16;
 411     a = src_ptr[xi];
 412     b = src_ptr[xi + 1];
 413     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
 414     x += dx;
 415     dst_ptr += 2;
 416   }
 417   if (dst_width & 1) {
 418     int xi = x >> 16;
 419     int a = src_ptr[xi];
 420     int b = src_ptr[xi + 1];
 421     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 422   }
 423 }
 424
 425 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
 426                          int dst_width, int x32, int dx) {
 427   int64 x = (int64)(x32);
 428   int j;
 429   for (j = 0; j < dst_width - 1; j += 2) {
 430     int64 xi = x >> 16;
 431     int a = src_ptr[xi];
 432     int b = src_ptr[xi + 1];
 433     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 434     x += dx;
 435     xi = x >> 16;
 436     a = src_ptr[xi];
 437     b = src_ptr[xi + 1];
 438     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
 439     x += dx;
 440     dst_ptr += 2;
 441   }
 442   if (dst_width & 1) {
 443     int64 xi = x >> 16;
 444     int a = src_ptr[xi];
 445     int b = src_ptr[xi + 1];
 446     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 447   }
 448 }
 449 #undef BLENDER
 450
 451 #define BLENDER(a, b, f) (uint16)((int)(a) + \
 452     ((int)(f) * ((int)(b) - (int)(a)) >> 16))
 453
 454 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
 455                        int dst_width, int x, int dx) {
 456   int j;
 457   for (j = 0; j < dst_width - 1; j += 2) {
 458     int xi = x >> 16;
 459     int a = src_ptr[xi];
 460     int b = src_ptr[xi + 1];
 461     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 462     x += dx;
 463     xi = x >> 16;
 464     a = src_ptr[xi];
 465     b = src_ptr[xi + 1];
 466     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
 467     x += dx;
 468     dst_ptr += 2;
 469   }
 470   if (dst_width & 1) {
 471     int xi = x >> 16;
 472     int a = src_ptr[xi];
 473     int b = src_ptr[xi + 1];
 474     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 475   }
 476 }
 477
 478 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
 479                          int dst_width, int x32, int dx) {
 480   int64 x = (int64)(x32);
 481   int j;
 482   for (j = 0; j < dst_width - 1; j += 2) {
 483     int64 xi = x >> 16;
 484     int a = src_ptr[xi];
 485     int b = src_ptr[xi + 1];
 486     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 487     x += dx;
 488     xi = x >> 16;
 489     a = src_ptr[xi];
 490     b = src_ptr[xi + 1];
 491     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
 492     x += dx;
 493     dst_ptr += 2;
 494   }
 495   if (dst_width & 1) {
 496     int64 xi = x >> 16;
 497     int a = src_ptr[xi];
 498     int b = src_ptr[xi + 1];
 499     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
 500   }
 501 }
 502 #undef BLENDER
 503
 504 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
 505                       uint8* dst, int dst_width) {
 506   int x;
 507   assert(dst_width % 3 == 0);
 508   for (x = 0; x < dst_width; x += 3) {
 509     dst[0] = src_ptr[0];
 510     dst[1] = src_ptr[3];
 511     dst[2] = src_ptr[6];
 512     dst += 3;
 513     src_ptr += 8;
 514   }
 515 }
 516
 517 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 518                          uint16* dst, int dst_width) {
 519   int x;
 520   assert(dst_width % 3 == 0);
 521   for (x = 0; x < dst_width; x += 3) {
 522     dst[0] = src_ptr[0];
 523     dst[1] = src_ptr[3];
 524     dst[2] = src_ptr[6];
 525     dst += 3;
 526     src_ptr += 8;
 527   }
 528 }
 529
 530 // 8x3 -> 3x1
 531 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
 532                             ptrdiff_t src_stride,
 533                             uint8* dst_ptr, int dst_width) {
 534   intptr_t stride = src_stride;
 535   int i;
 536   assert((dst_width % 3 == 0) && (dst_width > 0));
 537   for (i = 0; i < dst_width; i += 3) {
 538     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
 539         src_ptr[stride + 0] + src_ptr[stride + 1] +
 540         src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
 541         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
 542         (65536 / 9) >> 16;
 543     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
 544         src_ptr[stride + 3] + src_ptr[stride + 4] +
 545         src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
 546         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
 547         (65536 / 9) >> 16;
 548     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
 549         src_ptr[stride + 6] + src_ptr[stride + 7] +
 550         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
 551         (65536 / 6) >> 16;
 552     src_ptr += 8;
 553     dst_ptr += 3;
 554   }
 555 }
 556
 557 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
 558                                ptrdiff_t src_stride,
 559                                uint16* dst_ptr, int dst_width) {
 560   intptr_t stride = src_stride;
 561   int i;
 562   assert((dst_width % 3 == 0) && (dst_width > 0));
 563   for (i = 0; i < dst_width; i += 3) {
 564     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
 565         src_ptr[stride + 0] + src_ptr[stride + 1] +
 566         src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
 567         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
 568         (65536 / 9) >> 16;
 569     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
 570         src_ptr[stride + 3] + src_ptr[stride + 4] +
 571         src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
 572         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
 573         (65536 / 9) >> 16;
 574     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
 575         src_ptr[stride + 6] + src_ptr[stride + 7] +
 576         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
 577         (65536 / 6) >> 16;
 578     src_ptr += 8;
 579     dst_ptr += 3;
 580   }
 581 }
 582
 583 // 8x2 -> 3x1
 584 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
 585                             uint8* dst_ptr, int dst_width) {
 586   intptr_t stride = src_stride;
 587   int i;
 588   assert((dst_width % 3 == 0) && (dst_width > 0));
 589   for (i = 0; i < dst_width; i += 3) {
 590     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
 591         src_ptr[stride + 0] + src_ptr[stride + 1] +
 592         src_ptr[stride + 2]) * (65536 / 6) >> 16;
 593     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
 594         src_ptr[stride + 3] + src_ptr[stride + 4] +
 595         src_ptr[stride + 5]) * (65536 / 6) >> 16;
 596     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
 597         src_ptr[stride + 6] + src_ptr[stride + 7]) *
 598         (65536 / 4) >> 16;
 599     src_ptr += 8;
 600     dst_ptr += 3;
 601   }
 602 }
 603
 604 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 605                                uint16* dst_ptr, int dst_width) {
 606   intptr_t stride = src_stride;
 607   int i;
 608   assert((dst_width % 3 == 0) && (dst_width > 0));
 609   for (i = 0; i < dst_width; i += 3) {
 610     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
 611         src_ptr[stride + 0] + src_ptr[stride + 1] +
 612         src_ptr[stride + 2]) * (65536 / 6) >> 16;
 613     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
 614         src_ptr[stride + 3] + src_ptr[stride + 4] +
 615         src_ptr[stride + 5]) * (65536 / 6) >> 16;
 616     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
 617         src_ptr[stride + 6] + src_ptr[stride + 7]) *
 618         (65536 / 4) >> 16;
 619     src_ptr += 8;
 620     dst_ptr += 3;
 621   }
 622 }
 623
 624 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
 625                     uint16* dst_ptr, int src_width, int src_height) {
 626   int x;
 627   assert(src_width > 0);
 628   assert(src_height > 0);
 629   for (x = 0; x < src_width; ++x) {
 630     const uint8* s = src_ptr + x;
 631     unsigned int sum = 0u;
 632     int y;
 633     for (y = 0; y < src_height; ++y) {
 634       sum += s[0];
 635       s += src_stride;
 636     }
 637     // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
 638     dst_ptr[x] = sum < 65535u ? sum : 65535u;
 639   }
 640 }
 641
 642 void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
 643                        uint32* dst_ptr, int src_width, int src_height) {
 644   int x;
 645   assert(src_width > 0);
 646   assert(src_height > 0);
 647   for (x = 0; x < src_width; ++x) {
 648     const uint16* s = src_ptr + x;
 649     unsigned int sum = 0u;
 650     int y;
 651     for (y = 0; y < src_height; ++y) {
 652       sum += s[0];
 653       s += src_stride;
 654     }
 655     // No risk of overflow here now
 656     dst_ptr[x] = sum;
 657   }
 658 }
 659
 660 void ScaleARGBRowDown2_C(const uint8* src_argb,
 661                          ptrdiff_t src_stride,
 662                          uint8* dst_argb, int dst_width) {
 663   const uint32* src = (const uint32*)(src_argb);
 664   uint32* dst = (uint32*)(dst_argb);
 665
 666   int x;
 667   for (x = 0; x < dst_width - 1; x += 2) {
 668     dst[0] = src[1];
 669     dst[1] = src[3];
 670     src += 4;
 671     dst += 2;
 672   }
 673   if (dst_width & 1) {
 674     dst[0] = src[1];
 675   }
 676 }
 677
 678 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
 679                                ptrdiff_t src_stride,
 680                                uint8* dst_argb, int dst_width) {
 681   int x;
 682   for (x = 0; x < dst_width; ++x) {
 683     dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
 684     dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
 685     dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
 686     dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
 687     src_argb += 8;
 688     dst_argb += 4;
 689   }
 690 }
 691
 692 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
 693                             uint8* dst_argb, int dst_width) {
 694   int x;
 695   for (x = 0; x < dst_width; ++x) {
 696     dst_argb[0] = (src_argb[0] + src_argb[4] +
 697                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
 698     dst_argb[1] = (src_argb[1] + src_argb[5] +
 699                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
 700     dst_argb[2] = (src_argb[2] + src_argb[6] +
 701                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
 702     dst_argb[3] = (src_argb[3] + src_argb[7] +
 703                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
 704     src_argb += 8;
 705     dst_argb += 4;
 706   }
 707 }
 708
 709 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
 710                             int src_stepx,
 711                             uint8* dst_argb, int dst_width) {
 712   const uint32* src = (const uint32*)(src_argb);
 713   uint32* dst = (uint32*)(dst_argb);
 714
 715   int x;
 716   for (x = 0; x < dst_width - 1; x += 2) {
 717     dst[0] = src[0];
 718     dst[1] = src[src_stepx];
 719     src += src_stepx * 2;
 720     dst += 2;
 721   }
 722   if (dst_width & 1) {
 723     dst[0] = src[0];
 724   }
 725 }
 726
 727 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
 728                                ptrdiff_t src_stride,
 729                                int src_stepx,
 730                                uint8* dst_argb, int dst_width) {
 731   int x;
 732   for (x = 0; x < dst_width; ++x) {
 733     dst_argb[0] = (src_argb[0] + src_argb[4] +
 734                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
 735     dst_argb[1] = (src_argb[1] + src_argb[5] +
 736                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
 737     dst_argb[2] = (src_argb[2] + src_argb[6] +
 738                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
 739     dst_argb[3] = (src_argb[3] + src_argb[7] +
 740                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
 741     src_argb += src_stepx * 4;
 742     dst_argb += 4;
 743   }
 744 }
 745
 746 // Scales a single row of pixels using point sampling.
 747 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
 748                      int dst_width, int x, int dx) {
 749   const uint32* src = (const uint32*)(src_argb);
 750   uint32* dst = (uint32*)(dst_argb);
 751   int j;
 752   for (j = 0; j < dst_width - 1; j += 2) {
 753     dst[0] = src[x >> 16];
 754     x += dx;
 755     dst[1] = src[x >> 16];
 756     x += dx;
 757     dst += 2;
 758   }
 759   if (dst_width & 1) {
 760     dst[0] = src[x >> 16];
 761   }
 762 }
 763
 764 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
 765                        int dst_width, int x32, int dx) {
 766   int64 x = (int64)(x32);
 767   const uint32* src = (const uint32*)(src_argb);
 768   uint32* dst = (uint32*)(dst_argb);
 769   int j;
 770   for (j = 0; j < dst_width - 1; j += 2) {
 771     dst[0] = src[x >> 16];
 772     x += dx;
 773     dst[1] = src[x >> 16];
 774     x += dx;
 775     dst += 2;
 776   }
 777   if (dst_width & 1) {
 778     dst[0] = src[x >> 16];
 779   }
 780 }
 781
 782 // Scales a single row of pixels up by 2x using point sampling.
 783 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
 784                         int dst_width, int x, int dx) {
 785   const uint32* src = (const uint32*)(src_argb);
 786   uint32* dst = (uint32*)(dst_argb);
 787   int j;
 788   for (j = 0; j < dst_width - 1; j += 2) {
 789     dst[1] = dst[0] = src[0];
 790     src += 1;
 791     dst += 2;
 792   }
 793   if (dst_width & 1) {
 794     dst[0] = src[0];
 795   }
 796 }
 797
 798 // Mimics SSSE3 blender
 799 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
 800 #define BLENDERC(a, b, f, s) (uint32)( \
 801     BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
 802 #define BLENDER(a, b, f) \
 803     BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
 804     BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
 805
 806 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
 807                            int dst_width, int x, int dx) {
 808   const uint32* src = (const uint32*)(src_argb);
 809   uint32* dst = (uint32*)(dst_argb);
 810   int j;
 811   for (j = 0; j < dst_width - 1; j += 2) {
 812     int xi = x >> 16;
 813     int xf = (x >> 9) & 0x7f;
 814     uint32 a = src[xi];
 815     uint32 b = src[xi + 1];
 816     dst[0] = BLENDER(a, b, xf);
 817     x += dx;
 818     xi = x >> 16;
 819     xf = (x >> 9) & 0x7f;
 820     a = src[xi];
 821     b = src[xi + 1];
 822     dst[1] = BLENDER(a, b, xf);
 823     x += dx;
 824     dst += 2;
 825   }
 826   if (dst_width & 1) {
 827     int xi = x >> 16;
 828     int xf = (x >> 9) & 0x7f;
 829     uint32 a = src[xi];
 830     uint32 b = src[xi + 1];
 831     dst[0] = BLENDER(a, b, xf);
 832   }
 833 }
 834
 835 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
 836                              int dst_width, int x32, int dx) {
 837   int64 x = (int64)(x32);
 838   const uint32* src = (const uint32*)(src_argb);
 839   uint32* dst = (uint32*)(dst_argb);
 840   int j;
 841   for (j = 0; j < dst_width - 1; j += 2) {
 842     int64 xi = x >> 16;
 843     int xf = (x >> 9) & 0x7f;
 844     uint32 a = src[xi];
 845     uint32 b = src[xi + 1];
 846     dst[0] = BLENDER(a, b, xf);
 847     x += dx;
 848     xi = x >> 16;
 849     xf = (x >> 9) & 0x7f;
 850     a = src[xi];
 851     b = src[xi + 1];
 852     dst[1] = BLENDER(a, b, xf);
 853     x += dx;
 854     dst += 2;
 855   }
 856   if (dst_width & 1) {
 857     int64 xi = x >> 16;
 858     int xf = (x >> 9) & 0x7f;
 859     uint32 a = src[xi];
 860     uint32 b = src[xi + 1];
 861     dst[0] = BLENDER(a, b, xf);
 862   }
 863 }
 864 #undef BLENDER1
 865 #undef BLENDERC
 866 #undef BLENDER
 867
 868 // Scale plane vertically with bilinear interpolation.
 869 void ScalePlaneVertical(int src_height,
 870                         int dst_width, int dst_height,
 871                         int src_stride, int dst_stride,
 872                         const uint8* src_argb, uint8* dst_argb,
 873                         int x, int y, int dy,
 874                         int bpp, enum FilterMode filtering) {
 875   // TODO(fbarchard): Allow higher bpp.
 876   int dst_width_bytes = dst_width * bpp;
 877   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
 878       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
 879       InterpolateRow_C;
 880   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
 881   int j;
 882   assert(bpp >= 1 && bpp <= 4);
 883   assert(src_height != 0);
 884   assert(dst_width > 0);
 885   assert(dst_height > 0);
 886   src_argb += (x >> 16) * bpp;
 887 #if defined(HAS_INTERPOLATEROW_SSE2)
 888   if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
 889     InterpolateRow = InterpolateRow_Any_SSE2;
 890     if (IS_ALIGNED(dst_width_bytes, 16)) {
 891       InterpolateRow = InterpolateRow_Unaligned_SSE2;
 892       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
 893           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
 894         InterpolateRow = InterpolateRow_SSE2;
 895       }
 896     }
 897   }
 898 #endif
 899 #if defined(HAS_INTERPOLATEROW_SSSE3)
 900   if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
 901     InterpolateRow = InterpolateRow_Any_SSSE3;
 902     if (IS_ALIGNED(dst_width_bytes, 16)) {
 903       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
 904       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
 905           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
 906         InterpolateRow = InterpolateRow_SSSE3;
 907       }
 908     }
 909   }
 910 #endif
 911 #if defined(HAS_INTERPOLATEROW_AVX2)
 912   if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
 913     InterpolateRow = InterpolateRow_Any_AVX2;
 914     if (IS_ALIGNED(dst_width_bytes, 32)) {
 915       InterpolateRow = InterpolateRow_AVX2;
 916     }
 917   }
 918 #endif
 919 #if defined(HAS_INTERPOLATEROW_NEON)
 920   if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
 921     InterpolateRow = InterpolateRow_Any_NEON;
 922     if (IS_ALIGNED(dst_width_bytes, 16)) {
 923       InterpolateRow = InterpolateRow_NEON;
 924     }
 925   }
 926 #endif
 927 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
 928   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
 929       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
 930       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
 931     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
 932     if (IS_ALIGNED(dst_width_bytes, 4)) {
 933       InterpolateRow = InterpolateRow_MIPS_DSPR2;
 934     }
 935   }
 936 #endif
 937   for (j = 0; j < dst_height; ++j) {
 938     int yi;
 939     int yf;
 940     if (y > max_y) {
 941       y = max_y;
 942     }
 943     yi = y >> 16;
 944     yf = filtering ? ((y >> 8) & 255) : 0;
 945     InterpolateRow(dst_argb, src_argb + yi * src_stride,
 946                    src_stride, dst_width_bytes, yf);
 947     dst_argb += dst_stride;
 948     y += dy;
 949   }
 950 }
 951 void ScalePlaneVertical_16(int src_height,
 952                            int dst_width, int dst_height,
 953                            int src_stride, int dst_stride,
 954                            const uint16* src_argb, uint16* dst_argb,
 955                            int x, int y, int dy,
 956                            int wpp, enum FilterMode filtering) {
 957   // TODO(fbarchard): Allow higher wpp.
 958   int dst_width_words = dst_width * wpp;
 959   void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
 960       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
 961       InterpolateRow_16_C;
 962   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
 963   int j;
 964   assert(wpp >= 1 && wpp <= 2);
 965   assert(src_height != 0);
 966   assert(dst_width > 0);
 967   assert(dst_height > 0);
 968   src_argb += (x >> 16) * wpp;
 969 #if defined(HAS_INTERPOLATEROW_16_SSE2)
 970   if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
 971     InterpolateRow = InterpolateRow_Any_16_SSE2;
 972     if (IS_ALIGNED(dst_width_bytes, 16)) {
 973       InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
 974       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
 975           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
 976         InterpolateRow = InterpolateRow_16_SSE2;
 977       }
 978     }
 979   }
 980 #endif
 981 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
 982   if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
 983     InterpolateRow = InterpolateRow_Any_16_SSSE3;
 984     if (IS_ALIGNED(dst_width_bytes, 16)) {
 985       InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
 986       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
 987           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
 988         InterpolateRow = InterpolateRow_16_SSSE3;
 989       }
 990     }
 991   }
 992 #endif
 993 #if defined(HAS_INTERPOLATEROW_16_AVX2)
 994   if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
 995     InterpolateRow = InterpolateRow_Any_16_AVX2;
 996     if (IS_ALIGNED(dst_width_bytes, 32)) {
 997       InterpolateRow = InterpolateRow_16_AVX2;
 998     }
 999   }
1000 #endif
1001 #if defined(HAS_INTERPOLATEROW_16_NEON)
1002   if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
1003     InterpolateRow = InterpolateRow_Any_16_NEON;
1004     if (IS_ALIGNED(dst_width_bytes, 16)) {
1005       InterpolateRow = InterpolateRow_16_NEON;
1006     }
1007   }
1008 #endif
1009 #if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
1010   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
1011       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
1012       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
1013     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1014     if (IS_ALIGNED(dst_width_bytes, 4)) {
1015       InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1016     }
1017   }
1018 #endif
1019   for (j = 0; j < dst_height; ++j) {
1020     int yi;
1021     int yf;
1022     if (y > max_y) {
1023       y = max_y;
1024     }
1025     yi = y >> 16;
1026     yf = filtering ? ((y >> 8) & 255) : 0;
1027     InterpolateRow(dst_argb, src_argb + yi * src_stride,
1028                    src_stride, dst_width_words, yf);
1029     dst_argb += dst_stride;
1030     y += dy;
1031   }
1032 }
1033
1034 // Simplify the filtering based on scale factors.
1035 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
1036                                   int dst_width, int dst_height,
1037                                   enum FilterMode filtering) {
1038   if (src_width < 0) {
1039     src_width = -src_width;
1040   }
1041   if (src_height < 0) {
1042     src_height = -src_height;
1043   }
1044   if (filtering == kFilterBox) {
1045     // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1046     if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1047       filtering = kFilterBilinear;
1048     }
1049     // If scaling to larger, switch from Box to Bilinear.
1050     if (dst_width >= src_width || dst_height >= src_height) {
1051       filtering = kFilterBilinear;
1052     }
1053   }
1054   if (filtering == kFilterBilinear) {
1055     if (src_height == 1) {
1056       filtering = kFilterLinear;
1057     }
1058     // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1059     if (dst_height == src_height || dst_height * 3 == src_height) {
1060       filtering = kFilterLinear;
1061     }
1062     // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1063     // avoid reading 2 pixels horizontally that causes memory exception.
1064     if (src_width == 1) {
1065       filtering = kFilterNone;
1066     }
1067   }
1068   if (filtering == kFilterLinear) {
1069     if (src_width == 1) {
1070       filtering = kFilterNone;
1071     }
1072     // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1073     if (dst_width == src_width || dst_width * 3 == src_width) {
1074       filtering = kFilterNone;
1075     }
1076   }
1077   return filtering;
1078 }
1079
1080 // Divide num by div and return as 16.16 fixed point result.
1081 int FixedDiv_C(int num, int div) {
1082   return (int)(((int64)(num) << 16) / div);
1083 }
1084
1085 // Divide num by div and return as 16.16 fixed point result.
1086 int FixedDiv1_C(int num, int div) {
1087   return (int)((((int64)(num) << 16) - 0x00010001) /
1088                           (div - 1));
1089 }
1090
1091 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1092
1093 // Compute slope values for stepping.
1094 void ScaleSlope(int src_width, int src_height,
1095                 int dst_width, int dst_height,
1096                 enum FilterMode filtering,
1097                 int* x, int* y, int* dx, int* dy) {
1098   assert(x != NULL);
1099   assert(y != NULL);
1100   assert(dx != NULL);
1101   assert(dy != NULL);
1102   assert(src_width != 0);
1103   assert(src_height != 0);
1104   assert(dst_width > 0);
1105   assert(dst_height > 0);
1106   // Check for 1 pixel and avoid FixedDiv overflow.
1107   if (dst_width == 1 && src_width >= 32768) {
1108     dst_width = src_width;
1109   }
1110   if (dst_height == 1 && src_height >= 32768) {
1111     dst_height = src_height;
1112   }
1113   if (filtering == kFilterBox) {
1114     // Scale step for point sampling duplicates all pixels equally.
1115     *dx = FixedDiv(Abs(src_width), dst_width);
1116     *dy = FixedDiv(src_height, dst_height);
1117     *x = 0;
1118     *y = 0;
1119   } else if (filtering == kFilterBilinear) {
1120     // Scale step for bilinear sampling renders last pixel once for upsample.
1121     if (dst_width <= Abs(src_width)) {
1122       *dx = FixedDiv(Abs(src_width), dst_width);
1123       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1124     } else if (dst_width > 1) {
1125       *dx = FixedDiv1(Abs(src_width), dst_width);
1126       *x = 0;
1127     }
1128     if (dst_height <= src_height) {
1129       *dy = FixedDiv(src_height,  dst_height);
1130       *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
1131     } else if (dst_height > 1) {
1132       *dy = FixedDiv1(src_height, dst_height);
1133       *y = 0;
1134     }
1135   } else if (filtering == kFilterLinear) {
1136     // Scale step for bilinear sampling renders last pixel once for upsample.
1137     if (dst_width <= Abs(src_width)) {
1138       *dx = FixedDiv(Abs(src_width), dst_width);
1139       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1140     } else if (dst_width > 1) {
1141       *dx = FixedDiv1(Abs(src_width), dst_width);
1142       *x = 0;
1143     }
1144     *dy = FixedDiv(src_height, dst_height);
1145     *y = *dy >> 1;
1146   } else {
1147     // Scale step for point sampling duplicates all pixels equally.
1148     *dx = FixedDiv(Abs(src_width), dst_width);
1149     *dy = FixedDiv(src_height, dst_height);
1150     *x = CENTERSTART(*dx, 0);
1151     *y = CENTERSTART(*dy, 0);
1152   }
1153   // Negative src_width means horizontally mirror.
1154   if (src_width < 0) {
1155     *x += (dst_width - 1) * *dx;
1156     *dx = -*dx;
1157     // src_width = -src_width;   // Caller must do this.
1158   }
1159 }
1160 #undef CENTERSTART
1161
1162 #ifdef __cplusplus
1163 }  // extern "C"
1164 }  // namespace libyuv
1165 #endif