2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "libyuv/scale.h"
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
30 // CPU agnostic row functions
31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
32 uint8* dst, int dst_width) {
34 for (x = 0; x < dst_width - 1; x += 2) {
45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
46 uint16* dst, int dst_width) {
48 for (x = 0; x < dst_width - 1; x += 2) {
59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
60 uint8* dst, int dst_width) {
61 const uint8* s = src_ptr;
63 for (x = 0; x < dst_width - 1; x += 2) {
64 dst[0] = (s[0] + s[1] + 1) >> 1;
65 dst[1] = (s[2] + s[3] + 1) >> 1;
70 dst[0] = (s[0] + s[1] + 1) >> 1;
74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
75 uint16* dst, int dst_width) {
76 const uint16* s = src_ptr;
78 for (x = 0; x < dst_width - 1; x += 2) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 dst[1] = (s[2] + s[3] + 1) >> 1;
85 dst[0] = (s[0] + s[1] + 1) >> 1;
89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
90 uint8* dst, int dst_width) {
91 const uint8* s = src_ptr;
92 const uint8* t = src_ptr + src_stride;
94 for (x = 0; x < dst_width - 1; x += 2) {
95 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
96 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
102 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
106 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
107 uint16* dst, int dst_width) {
108 const uint16* s = src_ptr;
109 const uint16* t = src_ptr + src_stride;
111 for (x = 0; x < dst_width - 1; x += 2) {
112 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
113 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
119 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
123 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
124 uint8* dst, int dst_width) {
126 for (x = 0; x < dst_width - 1; x += 2) {
137 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
138 uint16* dst, int dst_width) {
140 for (x = 0; x < dst_width - 1; x += 2) {
151 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
152 uint8* dst, int dst_width) {
153 intptr_t stride = src_stride;
155 for (x = 0; x < dst_width - 1; x += 2) {
156 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
157 src_ptr[stride + 0] + src_ptr[stride + 1] +
158 src_ptr[stride + 2] + src_ptr[stride + 3] +
159 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
160 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
161 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
162 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
164 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
165 src_ptr[stride + 4] + src_ptr[stride + 5] +
166 src_ptr[stride + 6] + src_ptr[stride + 7] +
167 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
168 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
169 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
170 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
176 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
177 src_ptr[stride + 0] + src_ptr[stride + 1] +
178 src_ptr[stride + 2] + src_ptr[stride + 3] +
179 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
180 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
181 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
182 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
187 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
188 uint16* dst, int dst_width) {
189 intptr_t stride = src_stride;
191 for (x = 0; x < dst_width - 1; x += 2) {
192 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
193 src_ptr[stride + 0] + src_ptr[stride + 1] +
194 src_ptr[stride + 2] + src_ptr[stride + 3] +
195 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
196 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
197 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
198 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
200 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
201 src_ptr[stride + 4] + src_ptr[stride + 5] +
202 src_ptr[stride + 6] + src_ptr[stride + 7] +
203 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
204 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
205 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
206 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
212 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
213 src_ptr[stride + 0] + src_ptr[stride + 1] +
214 src_ptr[stride + 2] + src_ptr[stride + 3] +
215 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
216 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
217 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
218 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
223 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
224 uint8* dst, int dst_width) {
226 assert((dst_width % 3 == 0) && (dst_width > 0));
227 for (x = 0; x < dst_width; x += 3) {
236 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
237 uint16* dst, int dst_width) {
239 assert((dst_width % 3 == 0) && (dst_width > 0));
240 for (x = 0; x < dst_width; x += 3) {
249 // Filter rows 0 and 1 together, 3 : 1
250 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
251 uint8* d, int dst_width) {
252 const uint8* s = src_ptr;
253 const uint8* t = src_ptr + src_stride;
255 assert((dst_width % 3 == 0) && (dst_width > 0));
256 for (x = 0; x < dst_width; x += 3) {
257 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
258 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
259 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
260 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
261 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
262 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
263 d[0] = (a0 * 3 + b0 + 2) >> 2;
264 d[1] = (a1 * 3 + b1 + 2) >> 2;
265 d[2] = (a2 * 3 + b2 + 2) >> 2;
272 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
273 uint16* d, int dst_width) {
274 const uint16* s = src_ptr;
275 const uint16* t = src_ptr + src_stride;
277 assert((dst_width % 3 == 0) && (dst_width > 0));
278 for (x = 0; x < dst_width; x += 3) {
279 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
280 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
281 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
282 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
283 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
284 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
285 d[0] = (a0 * 3 + b0 + 2) >> 2;
286 d[1] = (a1 * 3 + b1 + 2) >> 2;
287 d[2] = (a2 * 3 + b2 + 2) >> 2;
294 // Filter rows 1 and 2 together, 1 : 1
295 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
296 uint8* d, int dst_width) {
297 const uint8* s = src_ptr;
298 const uint8* t = src_ptr + src_stride;
300 assert((dst_width % 3 == 0) && (dst_width > 0));
301 for (x = 0; x < dst_width; x += 3) {
302 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
303 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
304 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
305 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
306 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
307 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
308 d[0] = (a0 + b0 + 1) >> 1;
309 d[1] = (a1 + b1 + 1) >> 1;
310 d[2] = (a2 + b2 + 1) >> 1;
317 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
318 uint16* d, int dst_width) {
319 const uint16* s = src_ptr;
320 const uint16* t = src_ptr + src_stride;
322 assert((dst_width % 3 == 0) && (dst_width > 0));
323 for (x = 0; x < dst_width; x += 3) {
324 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
325 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
326 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
327 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
328 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
329 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
330 d[0] = (a0 + b0 + 1) >> 1;
331 d[1] = (a1 + b1 + 1) >> 1;
332 d[2] = (a2 + b2 + 1) >> 1;
339 // Scales a single row of pixels using point sampling.
340 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
341 int dst_width, int x, int dx) {
343 for (j = 0; j < dst_width - 1; j += 2) {
344 dst_ptr[0] = src_ptr[x >> 16];
346 dst_ptr[1] = src_ptr[x >> 16];
351 dst_ptr[0] = src_ptr[x >> 16];
355 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
356 int dst_width, int x, int dx) {
358 for (j = 0; j < dst_width - 1; j += 2) {
359 dst_ptr[0] = src_ptr[x >> 16];
361 dst_ptr[1] = src_ptr[x >> 16];
366 dst_ptr[0] = src_ptr[x >> 16];
370 // Scales a single row of pixels up by 2x using point sampling.
371 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
372 int dst_width, int x, int dx) {
374 for (j = 0; j < dst_width - 1; j += 2) {
375 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
380 dst_ptr[0] = src_ptr[0];
384 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
385 int dst_width, int x, int dx) {
387 for (j = 0; j < dst_width - 1; j += 2) {
388 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
393 dst_ptr[0] = src_ptr[0];
397 // (1-f)a + fb can be replaced with a + f(b-a)
398 #define BLENDER(a, b, f) (uint8)((int)(a) + \
399 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
401 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
402 int dst_width, int x, int dx) {
404 for (j = 0; j < dst_width - 1; j += 2) {
407 int b = src_ptr[xi + 1];
408 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
413 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
420 int b = src_ptr[xi + 1];
421 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
425 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
426 int dst_width, int x32, int dx) {
427 int64 x = (int64)(x32);
429 for (j = 0; j < dst_width - 1; j += 2) {
432 int b = src_ptr[xi + 1];
433 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
438 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
445 int b = src_ptr[xi + 1];
446 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
451 #define BLENDER(a, b, f) (uint16)((int)(a) + \
452 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
454 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
455 int dst_width, int x, int dx) {
457 for (j = 0; j < dst_width - 1; j += 2) {
460 int b = src_ptr[xi + 1];
461 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
466 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
473 int b = src_ptr[xi + 1];
474 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
478 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
479 int dst_width, int x32, int dx) {
480 int64 x = (int64)(x32);
482 for (j = 0; j < dst_width - 1; j += 2) {
485 int b = src_ptr[xi + 1];
486 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
491 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
498 int b = src_ptr[xi + 1];
499 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
504 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
505 uint8* dst, int dst_width) {
507 assert(dst_width % 3 == 0);
508 for (x = 0; x < dst_width; x += 3) {
517 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
518 uint16* dst, int dst_width) {
520 assert(dst_width % 3 == 0);
521 for (x = 0; x < dst_width; x += 3) {
531 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
532 ptrdiff_t src_stride,
533 uint8* dst_ptr, int dst_width) {
534 intptr_t stride = src_stride;
536 assert((dst_width % 3 == 0) && (dst_width > 0));
537 for (i = 0; i < dst_width; i += 3) {
538 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
539 src_ptr[stride + 0] + src_ptr[stride + 1] +
540 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
541 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
543 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
544 src_ptr[stride + 3] + src_ptr[stride + 4] +
545 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
546 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
548 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
549 src_ptr[stride + 6] + src_ptr[stride + 7] +
550 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
557 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
558 ptrdiff_t src_stride,
559 uint16* dst_ptr, int dst_width) {
560 intptr_t stride = src_stride;
562 assert((dst_width % 3 == 0) && (dst_width > 0));
563 for (i = 0; i < dst_width; i += 3) {
564 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
565 src_ptr[stride + 0] + src_ptr[stride + 1] +
566 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
567 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
569 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
570 src_ptr[stride + 3] + src_ptr[stride + 4] +
571 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
572 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
574 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
575 src_ptr[stride + 6] + src_ptr[stride + 7] +
576 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
584 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
585 uint8* dst_ptr, int dst_width) {
586 intptr_t stride = src_stride;
588 assert((dst_width % 3 == 0) && (dst_width > 0));
589 for (i = 0; i < dst_width; i += 3) {
590 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
591 src_ptr[stride + 0] + src_ptr[stride + 1] +
592 src_ptr[stride + 2]) * (65536 / 6) >> 16;
593 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
594 src_ptr[stride + 3] + src_ptr[stride + 4] +
595 src_ptr[stride + 5]) * (65536 / 6) >> 16;
596 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
597 src_ptr[stride + 6] + src_ptr[stride + 7]) *
604 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
605 uint16* dst_ptr, int dst_width) {
606 intptr_t stride = src_stride;
608 assert((dst_width % 3 == 0) && (dst_width > 0));
609 for (i = 0; i < dst_width; i += 3) {
610 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
611 src_ptr[stride + 0] + src_ptr[stride + 1] +
612 src_ptr[stride + 2]) * (65536 / 6) >> 16;
613 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
614 src_ptr[stride + 3] + src_ptr[stride + 4] +
615 src_ptr[stride + 5]) * (65536 / 6) >> 16;
616 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
617 src_ptr[stride + 6] + src_ptr[stride + 7]) *
624 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
625 uint16* dst_ptr, int src_width, int src_height) {
627 assert(src_width > 0);
628 assert(src_height > 0);
629 for (x = 0; x < src_width; ++x) {
630 const uint8* s = src_ptr + x;
631 unsigned int sum = 0u;
633 for (y = 0; y < src_height; ++y) {
637 // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
638 dst_ptr[x] = sum < 65535u ? sum : 65535u;
642 void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
643 uint32* dst_ptr, int src_width, int src_height) {
645 assert(src_width > 0);
646 assert(src_height > 0);
647 for (x = 0; x < src_width; ++x) {
648 const uint16* s = src_ptr + x;
649 unsigned int sum = 0u;
651 for (y = 0; y < src_height; ++y) {
655 // No risk of overflow here now
660 void ScaleARGBRowDown2_C(const uint8* src_argb,
661 ptrdiff_t src_stride,
662 uint8* dst_argb, int dst_width) {
663 const uint32* src = (const uint32*)(src_argb);
664 uint32* dst = (uint32*)(dst_argb);
667 for (x = 0; x < dst_width - 1; x += 2) {
678 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
679 ptrdiff_t src_stride,
680 uint8* dst_argb, int dst_width) {
682 for (x = 0; x < dst_width; ++x) {
683 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
684 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
685 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
686 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
692 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
693 uint8* dst_argb, int dst_width) {
695 for (x = 0; x < dst_width; ++x) {
696 dst_argb[0] = (src_argb[0] + src_argb[4] +
697 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
698 dst_argb[1] = (src_argb[1] + src_argb[5] +
699 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
700 dst_argb[2] = (src_argb[2] + src_argb[6] +
701 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
702 dst_argb[3] = (src_argb[3] + src_argb[7] +
703 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
709 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
711 uint8* dst_argb, int dst_width) {
712 const uint32* src = (const uint32*)(src_argb);
713 uint32* dst = (uint32*)(dst_argb);
716 for (x = 0; x < dst_width - 1; x += 2) {
718 dst[1] = src[src_stepx];
719 src += src_stepx * 2;
727 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
728 ptrdiff_t src_stride,
730 uint8* dst_argb, int dst_width) {
732 for (x = 0; x < dst_width; ++x) {
733 dst_argb[0] = (src_argb[0] + src_argb[4] +
734 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
735 dst_argb[1] = (src_argb[1] + src_argb[5] +
736 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
737 dst_argb[2] = (src_argb[2] + src_argb[6] +
738 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
739 dst_argb[3] = (src_argb[3] + src_argb[7] +
740 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
741 src_argb += src_stepx * 4;
746 // Scales a single row of pixels using point sampling.
747 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
748 int dst_width, int x, int dx) {
749 const uint32* src = (const uint32*)(src_argb);
750 uint32* dst = (uint32*)(dst_argb);
752 for (j = 0; j < dst_width - 1; j += 2) {
753 dst[0] = src[x >> 16];
755 dst[1] = src[x >> 16];
760 dst[0] = src[x >> 16];
764 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
765 int dst_width, int x32, int dx) {
766 int64 x = (int64)(x32);
767 const uint32* src = (const uint32*)(src_argb);
768 uint32* dst = (uint32*)(dst_argb);
770 for (j = 0; j < dst_width - 1; j += 2) {
771 dst[0] = src[x >> 16];
773 dst[1] = src[x >> 16];
778 dst[0] = src[x >> 16];
782 // Scales a single row of pixels up by 2x using point sampling.
783 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
784 int dst_width, int x, int dx) {
785 const uint32* src = (const uint32*)(src_argb);
786 uint32* dst = (uint32*)(dst_argb);
788 for (j = 0; j < dst_width - 1; j += 2) {
789 dst[1] = dst[0] = src[0];
798 // Mimics SSSE3 blender
799 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
800 #define BLENDERC(a, b, f, s) (uint32)( \
801 BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
802 #define BLENDER(a, b, f) \
803 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
804 BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
806 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
807 int dst_width, int x, int dx) {
808 const uint32* src = (const uint32*)(src_argb);
809 uint32* dst = (uint32*)(dst_argb);
811 for (j = 0; j < dst_width - 1; j += 2) {
813 int xf = (x >> 9) & 0x7f;
815 uint32 b = src[xi + 1];
816 dst[0] = BLENDER(a, b, xf);
819 xf = (x >> 9) & 0x7f;
822 dst[1] = BLENDER(a, b, xf);
828 int xf = (x >> 9) & 0x7f;
830 uint32 b = src[xi + 1];
831 dst[0] = BLENDER(a, b, xf);
835 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
836 int dst_width, int x32, int dx) {
837 int64 x = (int64)(x32);
838 const uint32* src = (const uint32*)(src_argb);
839 uint32* dst = (uint32*)(dst_argb);
841 for (j = 0; j < dst_width - 1; j += 2) {
843 int xf = (x >> 9) & 0x7f;
845 uint32 b = src[xi + 1];
846 dst[0] = BLENDER(a, b, xf);
849 xf = (x >> 9) & 0x7f;
852 dst[1] = BLENDER(a, b, xf);
858 int xf = (x >> 9) & 0x7f;
860 uint32 b = src[xi + 1];
861 dst[0] = BLENDER(a, b, xf);
868 // Scale plane vertically with bilinear interpolation.
869 void ScalePlaneVertical(int src_height,
870 int dst_width, int dst_height,
871 int src_stride, int dst_stride,
872 const uint8* src_argb, uint8* dst_argb,
873 int x, int y, int dy,
874 int bpp, enum FilterMode filtering) {
875 // TODO(fbarchard): Allow higher bpp.
876 int dst_width_bytes = dst_width * bpp;
877 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
878 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
880 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
882 assert(bpp >= 1 && bpp <= 4);
883 assert(src_height != 0);
884 assert(dst_width > 0);
885 assert(dst_height > 0);
886 src_argb += (x >> 16) * bpp;
887 #if defined(HAS_INTERPOLATEROW_SSE2)
888 if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
889 InterpolateRow = InterpolateRow_Any_SSE2;
890 if (IS_ALIGNED(dst_width_bytes, 16)) {
891 InterpolateRow = InterpolateRow_Unaligned_SSE2;
892 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
893 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
894 InterpolateRow = InterpolateRow_SSE2;
899 #if defined(HAS_INTERPOLATEROW_SSSE3)
900 if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
901 InterpolateRow = InterpolateRow_Any_SSSE3;
902 if (IS_ALIGNED(dst_width_bytes, 16)) {
903 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
904 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
905 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
906 InterpolateRow = InterpolateRow_SSSE3;
911 #if defined(HAS_INTERPOLATEROW_AVX2)
912 if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
913 InterpolateRow = InterpolateRow_Any_AVX2;
914 if (IS_ALIGNED(dst_width_bytes, 32)) {
915 InterpolateRow = InterpolateRow_AVX2;
919 #if defined(HAS_INTERPOLATEROW_NEON)
920 if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
921 InterpolateRow = InterpolateRow_Any_NEON;
922 if (IS_ALIGNED(dst_width_bytes, 16)) {
923 InterpolateRow = InterpolateRow_NEON;
927 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
928 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
929 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
930 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
931 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
932 if (IS_ALIGNED(dst_width_bytes, 4)) {
933 InterpolateRow = InterpolateRow_MIPS_DSPR2;
937 for (j = 0; j < dst_height; ++j) {
944 yf = filtering ? ((y >> 8) & 255) : 0;
945 InterpolateRow(dst_argb, src_argb + yi * src_stride,
946 src_stride, dst_width_bytes, yf);
947 dst_argb += dst_stride;
951 void ScalePlaneVertical_16(int src_height,
952 int dst_width, int dst_height,
953 int src_stride, int dst_stride,
954 const uint16* src_argb, uint16* dst_argb,
955 int x, int y, int dy,
956 int wpp, enum FilterMode filtering) {
957 // TODO(fbarchard): Allow higher wpp.
958 int dst_width_words = dst_width * wpp;
959 void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
960 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
962 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
964 assert(wpp >= 1 && wpp <= 2);
965 assert(src_height != 0);
966 assert(dst_width > 0);
967 assert(dst_height > 0);
968 src_argb += (x >> 16) * wpp;
969 #if defined(HAS_INTERPOLATEROW_16_SSE2)
970 if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
971 InterpolateRow = InterpolateRow_Any_16_SSE2;
972 if (IS_ALIGNED(dst_width_bytes, 16)) {
973 InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
974 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
975 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
976 InterpolateRow = InterpolateRow_16_SSE2;
981 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
982 if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
983 InterpolateRow = InterpolateRow_Any_16_SSSE3;
984 if (IS_ALIGNED(dst_width_bytes, 16)) {
985 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
986 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
987 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
988 InterpolateRow = InterpolateRow_16_SSSE3;
993 #if defined(HAS_INTERPOLATEROW_16_AVX2)
994 if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
995 InterpolateRow = InterpolateRow_Any_16_AVX2;
996 if (IS_ALIGNED(dst_width_bytes, 32)) {
997 InterpolateRow = InterpolateRow_16_AVX2;
1001 #if defined(HAS_INTERPOLATEROW_16_NEON)
1002 if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
1003 InterpolateRow = InterpolateRow_Any_16_NEON;
1004 if (IS_ALIGNED(dst_width_bytes, 16)) {
1005 InterpolateRow = InterpolateRow_16_NEON;
1009 #if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
1010 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
1011 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
1012 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
1013 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1014 if (IS_ALIGNED(dst_width_bytes, 4)) {
1015 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1019 for (j = 0; j < dst_height; ++j) {
1026 yf = filtering ? ((y >> 8) & 255) : 0;
1027 InterpolateRow(dst_argb, src_argb + yi * src_stride,
1028 src_stride, dst_width_words, yf);
1029 dst_argb += dst_stride;
1034 // Simplify the filtering based on scale factors.
1035 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
1036 int dst_width, int dst_height,
1037 enum FilterMode filtering) {
1038 if (src_width < 0) {
1039 src_width = -src_width;
1041 if (src_height < 0) {
1042 src_height = -src_height;
1044 if (filtering == kFilterBox) {
1045 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1046 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1047 filtering = kFilterBilinear;
1049 // If scaling to larger, switch from Box to Bilinear.
1050 if (dst_width >= src_width || dst_height >= src_height) {
1051 filtering = kFilterBilinear;
1054 if (filtering == kFilterBilinear) {
1055 if (src_height == 1) {
1056 filtering = kFilterLinear;
1058 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1059 if (dst_height == src_height || dst_height * 3 == src_height) {
1060 filtering = kFilterLinear;
1062 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1063 // avoid reading 2 pixels horizontally that causes memory exception.
1064 if (src_width == 1) {
1065 filtering = kFilterNone;
1068 if (filtering == kFilterLinear) {
1069 if (src_width == 1) {
1070 filtering = kFilterNone;
1072 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1073 if (dst_width == src_width || dst_width * 3 == src_width) {
1074 filtering = kFilterNone;
1080 // Divide num by div and return as 16.16 fixed point result.
1081 int FixedDiv_C(int num, int div) {
1082 return (int)(((int64)(num) << 16) / div);
1085 // Divide num by div and return as 16.16 fixed point result.
1086 int FixedDiv1_C(int num, int div) {
1087 return (int)((((int64)(num) << 16) - 0x00010001) /
1091 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1093 // Compute slope values for stepping.
1094 void ScaleSlope(int src_width, int src_height,
1095 int dst_width, int dst_height,
1096 enum FilterMode filtering,
1097 int* x, int* y, int* dx, int* dy) {
1102 assert(src_width != 0);
1103 assert(src_height != 0);
1104 assert(dst_width > 0);
1105 assert(dst_height > 0);
1106 // Check for 1 pixel and avoid FixedDiv overflow.
1107 if (dst_width == 1 && src_width >= 32768) {
1108 dst_width = src_width;
1110 if (dst_height == 1 && src_height >= 32768) {
1111 dst_height = src_height;
1113 if (filtering == kFilterBox) {
1114 // Scale step for point sampling duplicates all pixels equally.
1115 *dx = FixedDiv(Abs(src_width), dst_width);
1116 *dy = FixedDiv(src_height, dst_height);
1119 } else if (filtering == kFilterBilinear) {
1120 // Scale step for bilinear sampling renders last pixel once for upsample.
1121 if (dst_width <= Abs(src_width)) {
1122 *dx = FixedDiv(Abs(src_width), dst_width);
1123 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1124 } else if (dst_width > 1) {
1125 *dx = FixedDiv1(Abs(src_width), dst_width);
1128 if (dst_height <= src_height) {
1129 *dy = FixedDiv(src_height, dst_height);
1130 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1131 } else if (dst_height > 1) {
1132 *dy = FixedDiv1(src_height, dst_height);
1135 } else if (filtering == kFilterLinear) {
1136 // Scale step for bilinear sampling renders last pixel once for upsample.
1137 if (dst_width <= Abs(src_width)) {
1138 *dx = FixedDiv(Abs(src_width), dst_width);
1139 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1140 } else if (dst_width > 1) {
1141 *dx = FixedDiv1(Abs(src_width), dst_width);
1144 *dy = FixedDiv(src_height, dst_height);
1147 // Scale step for point sampling duplicates all pixels equally.
1148 *dx = FixedDiv(Abs(src_width), dst_width);
1149 *dy = FixedDiv(src_height, dst_height);
1150 *x = CENTERSTART(*dx, 0);
1151 *y = CENTERSTART(*dy, 0);
1153 // Negative src_width means horizontally mirror.
1154 if (src_width < 0) {
1155 *x += (dst_width - 1) * *dx;
1157 // src_width = -src_width; // Caller must do this.
1164 } // namespace libyuv