2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "./vp9_rtcd.h"
13 #include "vpx_ports/mem.h"
14 #include "vpx/vpx_integer.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_filter.h"
19 #include "vp9/encoder/vp9_variance.h"
21 void variance(const uint8_t *src_ptr,
23 const uint8_t *ref_ptr,
35 for (i = 0; i < h; i++) {
36 for (j = 0; j < w; j++) {
37 diff = src_ptr[j] - ref_ptr[j];
42 src_ptr += source_stride;
43 ref_ptr += recon_stride;
47 /****************************************************************************
49 * ROUTINE : filter_block2d_bil_first_pass
51 * INPUTS : uint8_t *src_ptr : Pointer to source block.
52 * uint32_t src_pixels_per_line : Stride of input block.
53 * uint32_t pixel_step : Offset between filter input
54 * samples (see notes).
55 * uint32_t output_height : Input block height.
56 * uint32_t output_width : Input block width.
57 * int32_t *vp9_filter : Array of 2 bi-linear filter
60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
65 * either horizontal or vertical direction to produce the
66 * filtered output block. Used to implement first-pass
67 * of 2-D separable filter.
69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
70 * Two filter taps should sum to VP9_FILTER_WEIGHT.
71 * pixel_step defines whether the filter is applied
72 * horizontally (pixel_step=1) or vertically (pixel_step=
74 * It defines the offset required to move from one input
77 ****************************************************************************/
78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
80 unsigned int src_pixels_per_line,
82 unsigned int output_height,
83 unsigned int output_width,
84 const int16_t *vp9_filter) {
87 for (i = 0; i < output_height; i++) {
88 for (j = 0; j < output_width; j++) {
89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
90 (int)src_ptr[pixel_step] * vp9_filter[1],
97 src_ptr += src_pixels_per_line - output_width;
98 output_ptr += output_width;
102 /****************************************************************************
104 * ROUTINE : filter_block2d_bil_second_pass
106 * INPUTS : int32_t *src_ptr : Pointer to source block.
107 * uint32_t src_pixels_per_line : Stride of input block.
108 * uint32_t pixel_step : Offset between filter input
109 * samples (see notes).
110 * uint32_t output_height : Input block height.
111 * uint32_t output_width : Input block width.
112 * int32_t *vp9_filter : Array of 2 bi-linear filter
115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
120 * either horizontal or vertical direction to produce the
121 * filtered output block. Used to implement second-pass
122 * of 2-D separable filter.
124 * SPECIAL NOTES : Requires 32-bit input as produced by
125 * filter_block2d_bil_first_pass.
126 * Two filter taps should sum to VP9_FILTER_WEIGHT.
127 * pixel_step defines whether the filter is applied
128 * horizontally (pixel_step=1) or vertically (pixel_step=
130 * It defines the offset required to move from one input
133 ****************************************************************************/
134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
136 unsigned int src_pixels_per_line,
137 unsigned int pixel_step,
138 unsigned int output_height,
139 unsigned int output_width,
140 const int16_t *vp9_filter) {
143 for (i = 0; i < output_height; i++) {
144 for (j = 0; j < output_width; j++) {
145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
146 (int)src_ptr[pixel_step] * vp9_filter[1],
151 src_ptr += src_pixels_per_line - output_width;
152 output_ptr += output_width;
156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
157 unsigned int i, sum = 0;
159 for (i = 0; i < 256; i++) {
160 sum += (src_ptr[i] * src_ptr[i]);
166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
168 const uint8_t *ref_ptr,
174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
176 return (var - (((int64_t)avg * avg) >> 11));
179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
180 int src_pixels_per_line,
183 const uint8_t *dst_ptr,
184 int dst_pixels_per_line,
186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
187 uint8_t temp2[68 * 64];
188 const int16_t *hfilter, *vfilter;
190 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
191 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
201 int src_pixels_per_line,
204 const uint8_t *dst_ptr,
205 int dst_pixels_per_line,
207 const uint8_t *second_pred) {
208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
209 uint8_t temp2[68 * 64];
210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
211 const int16_t *hfilter, *vfilter;
213 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
214 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
219 comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
225 const uint8_t *ref_ptr,
231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
233 return (var - (((int64_t)avg * avg) >> 11));
236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
237 int src_pixels_per_line,
240 const uint8_t *dst_ptr,
241 int dst_pixels_per_line,
243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
244 uint8_t temp2[68 * 64];
245 const int16_t *hfilter, *vfilter;
247 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
248 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
258 int src_pixels_per_line,
261 const uint8_t *dst_ptr,
262 int dst_pixels_per_line,
264 const uint8_t *second_pred) {
265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
266 uint8_t temp2[68 * 64];
267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
268 const int16_t *hfilter, *vfilter;
270 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
271 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
276 comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
282 const uint8_t *ref_ptr,
288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
290 return (var - (((int64_t)avg * avg) >> 9));
293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
294 int src_pixels_per_line,
297 const uint8_t *dst_ptr,
298 int dst_pixels_per_line,
300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
301 uint8_t temp2[36 * 32];
302 const int16_t *hfilter, *vfilter;
304 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
305 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
315 int src_pixels_per_line,
318 const uint8_t *dst_ptr,
319 int dst_pixels_per_line,
321 const uint8_t *second_pred) {
322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
323 uint8_t temp2[36 * 32];
324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
325 const int16_t *hfilter, *vfilter;
327 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
328 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
333 comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
339 const uint8_t *ref_ptr,
345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
347 return (var - (((int64_t)avg * avg) >> 9));
350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
351 int src_pixels_per_line,
354 const uint8_t *dst_ptr,
355 int dst_pixels_per_line,
357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
358 uint8_t temp2[36 * 32];
359 const int16_t *hfilter, *vfilter;
361 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
362 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
372 int src_pixels_per_line,
375 const uint8_t *dst_ptr,
376 int dst_pixels_per_line,
378 const uint8_t *second_pred) {
379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
380 uint8_t temp2[36 * 32];
381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
382 const int16_t *hfilter, *vfilter;
384 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
385 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
390 comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
396 const uint8_t *ref_ptr,
402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
404 return (var - (((int64_t)avg * avg) >> 12));
407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
409 const uint8_t *ref_ptr,
415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
417 return (var - (((int64_t)avg * avg) >> 10));
420 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
422 const uint8_t *ref_ptr,
428 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
430 return (var - (((unsigned int)avg * avg) >> 8));
433 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
435 const uint8_t *ref_ptr,
441 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
443 return (var - (((unsigned int)avg * avg) >> 7));
446 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
448 const uint8_t *ref_ptr,
454 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
456 return (var - (((unsigned int)avg * avg) >> 7));
459 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
460 const uint8_t *ref_ptr, int ref_stride,
461 unsigned int *sse, int *sum) {
462 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
465 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
467 const uint8_t *ref_ptr,
473 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
475 return (var - (((unsigned int)avg * avg) >> 6));
478 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
480 const uint8_t *ref_ptr,
486 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
488 return (var - (((unsigned int)avg * avg) >> 5));
491 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
493 const uint8_t *ref_ptr,
499 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
501 return (var - (((unsigned int)avg * avg) >> 5));
504 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
506 const uint8_t *ref_ptr,
512 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
514 return (var - (((unsigned int)avg * avg) >> 4));
518 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
520 const uint8_t *ref_ptr,
526 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
531 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
533 const uint8_t *ref_ptr,
539 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
544 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
546 const uint8_t *ref_ptr,
552 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
557 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
559 const uint8_t *ref_ptr,
565 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
571 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
572 int src_pixels_per_line,
575 const uint8_t *dst_ptr,
576 int dst_pixels_per_line,
578 uint8_t temp2[20 * 16];
579 const int16_t *hfilter, *vfilter;
580 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
582 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
583 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
585 // First filter 1d Horizontal
586 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
589 // Now filter Verticaly
590 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
592 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
595 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
596 int src_pixels_per_line,
599 const uint8_t *dst_ptr,
600 int dst_pixels_per_line,
602 const uint8_t *second_pred) {
603 uint8_t temp2[20 * 16];
604 const int16_t *hfilter, *vfilter;
605 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
606 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
608 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
609 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
611 // First filter 1d Horizontal
612 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
615 // Now filter Verticaly
616 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
617 comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
618 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
621 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
622 int src_pixels_per_line,
625 const uint8_t *dst_ptr,
626 int dst_pixels_per_line,
628 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
629 uint8_t temp2[20 * 16];
630 const int16_t *hfilter, *vfilter;
632 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
633 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
635 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
637 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
639 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
642 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
643 int src_pixels_per_line,
646 const uint8_t *dst_ptr,
647 int dst_pixels_per_line,
649 const uint8_t *second_pred) {
650 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
651 uint8_t temp2[20 * 16];
652 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
653 const int16_t *hfilter, *vfilter;
655 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
656 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
658 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
660 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
661 comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
662 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
665 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
666 int src_pixels_per_line,
669 const uint8_t *dst_ptr,
670 int dst_pixels_per_line,
672 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering
673 uint8_t temp2[20 * 16];
674 const int16_t *hfilter, *vfilter;
676 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
677 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
679 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
681 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
683 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
686 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
687 int src_pixels_per_line,
690 const uint8_t *dst_ptr,
691 int dst_pixels_per_line,
693 const uint8_t *second_pred) {
694 uint16_t fdata3[17 * 16];
695 uint8_t temp2[20 * 16];
696 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
697 const int16_t *hfilter, *vfilter;
699 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
700 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
702 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
704 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
706 comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
707 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
710 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
711 int src_pixels_per_line,
714 const uint8_t *dst_ptr,
715 int dst_pixels_per_line,
717 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
718 uint8_t temp2[68 * 64];
719 const int16_t *hfilter, *vfilter;
721 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
722 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
724 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
726 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
728 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
731 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
732 int src_pixels_per_line,
735 const uint8_t *dst_ptr,
736 int dst_pixels_per_line,
738 const uint8_t *second_pred) {
739 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
740 uint8_t temp2[68 * 64];
741 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
742 const int16_t *hfilter, *vfilter;
744 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
745 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
747 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
749 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
750 comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
751 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
754 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
755 int src_pixels_per_line,
758 const uint8_t *dst_ptr,
759 int dst_pixels_per_line,
761 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
762 uint8_t temp2[36 * 32];
763 const int16_t *hfilter, *vfilter;
765 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
766 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
768 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
770 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
772 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
775 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
776 int src_pixels_per_line,
779 const uint8_t *dst_ptr,
780 int dst_pixels_per_line,
782 const uint8_t *second_pred) {
783 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
784 uint8_t temp2[36 * 32];
785 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
786 const int16_t *hfilter, *vfilter;
788 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
789 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
791 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
793 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
794 comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
795 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
798 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
800 const uint8_t *ref_ptr,
803 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
804 ref_ptr, recon_stride, sse);
807 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
809 const uint8_t *ref_ptr,
812 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
813 ref_ptr, recon_stride, sse);
816 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
818 const uint8_t *ref_ptr,
821 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
822 ref_ptr, recon_stride, sse);
825 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
827 const uint8_t *ref_ptr,
830 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
831 ref_ptr, recon_stride, sse);
834 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
836 const uint8_t *ref_ptr,
839 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
840 ref_ptr, recon_stride, sse);
843 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
845 const uint8_t *ref_ptr,
848 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
849 ref_ptr, recon_stride, sse);
852 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
854 const uint8_t *ref_ptr,
857 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
858 ref_ptr, recon_stride, sse);
861 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
863 const uint8_t *ref_ptr,
866 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
867 ref_ptr, recon_stride, sse);
870 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
872 const uint8_t *ref_ptr,
875 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
876 ref_ptr, recon_stride, sse);
879 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
880 int src_pixels_per_line,
883 const uint8_t *dst_ptr,
884 int dst_pixels_per_line,
886 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
887 xoffset, yoffset, dst_ptr,
888 dst_pixels_per_line, sse);
892 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
893 int src_pixels_per_line,
896 const uint8_t *dst_ptr,
897 int dst_pixels_per_line,
899 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
900 xoffset, yoffset, dst_ptr,
901 dst_pixels_per_line, sse);
905 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
906 int src_pixels_per_line,
909 const uint8_t *dst_ptr,
910 int dst_pixels_per_line,
912 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
913 xoffset, yoffset, dst_ptr,
914 dst_pixels_per_line, sse);
918 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
919 int src_pixels_per_line,
922 const uint8_t *dst_ptr,
923 int dst_pixels_per_line,
925 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
926 uint8_t temp2[20 * 16];
927 const int16_t *hfilter, *vfilter;
929 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
930 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
932 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
934 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
936 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
939 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
940 int src_pixels_per_line,
943 const uint8_t *dst_ptr,
944 int dst_pixels_per_line,
946 const uint8_t *second_pred) {
947 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
948 uint8_t temp2[20 * 16];
949 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
950 const int16_t *hfilter, *vfilter;
952 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
953 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
955 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
957 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
958 comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
959 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
962 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
963 int src_pixels_per_line,
966 const uint8_t *dst_ptr,
967 int dst_pixels_per_line,
969 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
970 uint8_t temp2[20 * 16];
971 const int16_t *hfilter, *vfilter;
973 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
974 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
976 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
978 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
980 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
983 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
984 int src_pixels_per_line,
987 const uint8_t *dst_ptr,
988 int dst_pixels_per_line,
990 const uint8_t *second_pred) {
991 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
992 uint8_t temp2[20 * 16];
993 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
994 const int16_t *hfilter, *vfilter;
996 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
997 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
999 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1001 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
1002 comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
1003 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1006 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
1007 int src_pixels_per_line,
1010 const uint8_t *dst_ptr,
1011 int dst_pixels_per_line,
1012 unsigned int *sse) {
1013 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
1014 uint8_t temp2[20 * 16];
1015 const int16_t *hfilter, *vfilter;
1017 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1018 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1020 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1022 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1024 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
1027 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
1028 int src_pixels_per_line,
1031 const uint8_t *dst_ptr,
1032 int dst_pixels_per_line,
1034 const uint8_t *second_pred) {
1035 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
1036 uint8_t temp2[20 * 16];
1037 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
1038 const int16_t *hfilter, *vfilter;
1040 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1041 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1043 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1045 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
1046 comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
1047 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
1050 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
1051 int src_pixels_per_line,
1054 const uint8_t *dst_ptr,
1055 int dst_pixels_per_line,
1056 unsigned int *sse) {
1057 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
1058 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
1059 // of this big? same issue appears in all other block size settings.
1060 uint8_t temp2[20 * 16];
1061 const int16_t *hfilter, *vfilter;
1063 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1064 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1066 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1068 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1070 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
1073 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
1074 int src_pixels_per_line,
1077 const uint8_t *dst_ptr,
1078 int dst_pixels_per_line,
1080 const uint8_t *second_pred) {
1081 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
1082 uint8_t temp2[20 * 16];
1083 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
1084 const int16_t *hfilter, *vfilter;
1086 hfilter = BILINEAR_FILTERS_2TAP(xoffset);
1087 vfilter = BILINEAR_FILTERS_2TAP(yoffset);
1089 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1091 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
1092 comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
1093 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);