src/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "./vp9_rtcd.h"
  12
  13 #include "vpx_ports/mem.h"
  14 #include "vpx/vpx_integer.h"
  15
  16 #include "vp9/common/vp9_common.h"
  17 #include "vp9/common/vp9_filter.h"
  18
  19 #include "vp9/encoder/vp9_variance.h"
  20
  21 void variance(const uint8_t *a, int  a_stride,
  22               const uint8_t *b, int  b_stride,
  23               int  w, int  h, unsigned int *sse, int *sum) {
  24   int i, j;
  25
  26   *sum = 0;
  27   *sse = 0;
  28
  29   for (i = 0; i < h; i++) {
  30     for (j = 0; j < w; j++) {
  31       const int diff = a[j] - b[j];
  32       *sum += diff;
  33       *sse += diff * diff;
  34     }
  35
  36     a += a_stride;
  37     b += b_stride;
  38   }
  39 }
  40
  41 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  42 // or vertical direction to produce the filtered output block. Used to implement
  43 // first-pass of 2-D separable filter.
  44 //
  45 // Produces int32_t output to retain precision for next pass. Two filter taps
  46 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
  47 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
  48 // defines the offset required to move from one input to the next.
  49 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
  50                                               uint16_t *output_ptr,
  51                                               unsigned int src_pixels_per_line,
  52                                               int pixel_step,
  53                                               unsigned int output_height,
  54                                               unsigned int output_width,
  55                                               const int16_t *vp9_filter) {
  56   unsigned int i, j;
  57
  58   for (i = 0; i < output_height; i++) {
  59     for (j = 0; j < output_width; j++) {
  60       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  61                           (int)src_ptr[pixel_step] * vp9_filter[1],
  62                           FILTER_BITS);
  63
  64       src_ptr++;
  65     }
  66
  67     // Next row...
  68     src_ptr    += src_pixels_per_line - output_width;
  69     output_ptr += output_width;
  70   }
  71 }
  72
  73 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  74 // or vertical direction to produce the filtered output block. Used to implement
  75 // second-pass of 2-D separable filter.
  76 //
  77 // Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
  78 // filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
  79 // filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
  80 // stride). It defines the offset required to move from one input to the next.
  81 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
  82                                                uint8_t *output_ptr,
  83                                                unsigned int src_pixels_per_line,
  84                                                unsigned int pixel_step,
  85                                                unsigned int output_height,
  86                                                unsigned int output_width,
  87                                                const int16_t *vp9_filter) {
  88   unsigned int  i, j;
  89
  90   for (i = 0; i < output_height; i++) {
  91     for (j = 0; j < output_width; j++) {
  92       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  93                           (int)src_ptr[pixel_step] * vp9_filter[1],
  94                           FILTER_BITS);
  95       src_ptr++;
  96     }
  97
  98     src_ptr += src_pixels_per_line - output_width;
  99     output_ptr += output_width;
 100   }
 101 }
 102
 103 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
 104   unsigned int i, sum = 0;
 105
 106   for (i = 0; i < 256; ++i) {
 107     sum += src_ptr[i] * src_ptr[i];
 108   }
 109
 110   return sum;
 111 }
 112
 113 #define VAR(W, H) \
 114 unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 115                                        const uint8_t *b, int b_stride, \
 116                                        unsigned int *sse) { \
 117   int sum; \
 118   variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
 119   return *sse - (((int64_t)sum * sum) / (W * H)); \
 120 }
 121
 122 #define SUBPIX_VAR(W, H) \
 123 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
 124   const uint8_t *src, int  src_stride, \
 125   int xoffset, int  yoffset, \
 126   const uint8_t *dst, int dst_stride, \
 127   unsigned int *sse) { \
 128   uint16_t fdata3[(H + 1) * W]; \
 129   uint8_t temp2[H * W]; \
 130 \
 131   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
 132                                     BILINEAR_FILTERS_2TAP(xoffset)); \
 133   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 134                                      BILINEAR_FILTERS_2TAP(yoffset)); \
 135 \
 136   return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
 137 }
 138
 139 #define SUBPIX_AVG_VAR(W, H) \
 140 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
 141   const uint8_t *src, int  src_stride, \
 142   int xoffset, int  yoffset, \
 143   const uint8_t *dst, int dst_stride, \
 144   unsigned int *sse, \
 145   const uint8_t *second_pred) { \
 146   uint16_t fdata3[(H + 1) * W]; \
 147   uint8_t temp2[H * W]; \
 148   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, H * W); \
 149 \
 150   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
 151                                     BILINEAR_FILTERS_2TAP(xoffset)); \
 152   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 153                                      BILINEAR_FILTERS_2TAP(yoffset)); \
 154 \
 155   vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
 156 \
 157   return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
 158 }
 159
 160 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
 161                        const uint8_t *ref_ptr, int ref_stride,
 162                        unsigned int *sse, int *sum) {
 163   variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
 164 }
 165
 166 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
 167                      const uint8_t *ref_ptr, int ref_stride,
 168                      unsigned int *sse, int *sum) {
 169   variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
 170 }
 171
 172 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
 173                             const uint8_t *ref, int ref_stride,
 174                             unsigned int *sse) {
 175   int sum;
 176   variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
 177   return *sse;
 178 }
 179
 180 unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
 181                            const uint8_t *ref, int ref_stride,
 182                            unsigned int *sse) {
 183   int sum;
 184   variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
 185   return *sse;
 186 }
 187
 188 unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
 189                            const uint8_t *ref, int ref_stride,
 190                            unsigned int *sse) {
 191   int sum;
 192   variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
 193   return *sse;
 194 }
 195
 196 unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
 197                           const uint8_t *ref, int ref_stride,
 198                           unsigned int *sse) {
 199   int sum;
 200   variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
 201   return *sse;
 202 }
 203
 204 VAR(4, 4)
 205 SUBPIX_VAR(4, 4)
 206 SUBPIX_AVG_VAR(4, 4)
 207
 208 VAR(4, 8)
 209 SUBPIX_VAR(4, 8)
 210 SUBPIX_AVG_VAR(4, 8)
 211
 212 VAR(8, 4)
 213 SUBPIX_VAR(8, 4)
 214 SUBPIX_AVG_VAR(8, 4)
 215
 216 VAR(8, 8)
 217 SUBPIX_VAR(8, 8)
 218 SUBPIX_AVG_VAR(8, 8)
 219
 220 VAR(8, 16)
 221 SUBPIX_VAR(8, 16)
 222 SUBPIX_AVG_VAR(8, 16)
 223
 224 VAR(16, 8)
 225 SUBPIX_VAR(16, 8)
 226 SUBPIX_AVG_VAR(16, 8)
 227
 228 VAR(16, 16)
 229 SUBPIX_VAR(16, 16)
 230 SUBPIX_AVG_VAR(16, 16)
 231
 232 VAR(16, 32)
 233 SUBPIX_VAR(16, 32)
 234 SUBPIX_AVG_VAR(16, 32)
 235
 236 VAR(32, 16)
 237 SUBPIX_VAR(32, 16)
 238 SUBPIX_AVG_VAR(32, 16)
 239
 240 VAR(32, 32)
 241 SUBPIX_VAR(32, 32)
 242 SUBPIX_AVG_VAR(32, 32)
 243
 244 VAR(32, 64)
 245 SUBPIX_VAR(32, 64)
 246 SUBPIX_AVG_VAR(32, 64)
 247
 248 VAR(64, 32)
 249 SUBPIX_VAR(64, 32)
 250 SUBPIX_AVG_VAR(64, 32)
 251
 252 VAR(64, 64)
 253 SUBPIX_VAR(64, 64)
 254 SUBPIX_AVG_VAR(64, 64)
 255
 256 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
 257                        int height, const uint8_t *ref, int ref_stride) {
 258   int i, j;
 259
 260   for (i = 0; i < height; i++) {
 261     for (j = 0; j < width; j++) {
 262       const int tmp = pred[j] + ref[j];
 263       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
 264     }
 265     comp_pred += width;
 266     pred += width;
 267     ref += ref_stride;
 268   }
 269 }
 270
 271 #if CONFIG_VP9_HIGHBITDEPTH
 272 void high_variance64(const uint8_t *a8, int  a_stride,
 273                      const uint8_t *b8, int  b_stride,
 274                      int w, int h, uint64_t *sse,
 275                      uint64_t *sum) {
 276   int i, j;
 277
 278   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
 279   uint16_t *b = CONVERT_TO_SHORTPTR(b8);
 280   *sum = 0;
 281   *sse = 0;
 282
 283   for (i = 0; i < h; i++) {
 284     for (j = 0; j < w; j++) {
 285       const int diff = a[j] - b[j];
 286       *sum += diff;
 287       *sse += diff * diff;
 288     }
 289     a += a_stride;
 290     b += b_stride;
 291   }
 292 }
 293
 294 void high_variance(const uint8_t *a8, int  a_stride,
 295                    const uint8_t *b8, int  b_stride,
 296                    int w, int h, unsigned int *sse,
 297                    int *sum) {
 298   uint64_t sse_long = 0;
 299   uint64_t sum_long = 0;
 300   high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
 301   *sse = sse_long;
 302   *sum = sum_long;
 303 }
 304
 305 void high_10_variance(const uint8_t *a8, int  a_stride,
 306                       const uint8_t *b8, int  b_stride,
 307                       int w, int h, unsigned int *sse,
 308                       int *sum) {
 309   uint64_t sse_long = 0;
 310   uint64_t sum_long = 0;
 311   high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
 312   *sum = ROUND_POWER_OF_TWO(sum_long, 2);
 313   *sse = ROUND_POWER_OF_TWO(sse_long, 4);
 314 }
 315
 316 void high_12_variance(const uint8_t *a8, int  a_stride,
 317                       const uint8_t *b8, int  b_stride,
 318                       int w, int h, unsigned int *sse,
 319                       int *sum) {
 320   uint64_t sse_long = 0;
 321   uint64_t sum_long = 0;
 322   high_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
 323   *sum = ROUND_POWER_OF_TWO(sum_long, 4);
 324   *sse = ROUND_POWER_OF_TWO(sse_long, 8);
 325 }
 326
 327 static void high_var_filter_block2d_bil_first_pass(
 328     const uint8_t *src_ptr8,
 329     uint16_t *output_ptr,
 330     unsigned int src_pixels_per_line,
 331     int pixel_step,
 332     unsigned int output_height,
 333     unsigned int output_width,
 334     const int16_t *vp9_filter) {
 335   unsigned int i, j;
 336   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
 337   for (i = 0; i < output_height; i++) {
 338     for (j = 0; j < output_width; j++) {
 339       output_ptr[j] =
 340           ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
 341                              (int)src_ptr[pixel_step] * vp9_filter[1],
 342                              FILTER_BITS);
 343
 344       src_ptr++;
 345     }
 346
 347     // Next row...
 348     src_ptr += src_pixels_per_line - output_width;
 349     output_ptr += output_width;
 350   }
 351 }
 352
 353 static void high_var_filter_block2d_bil_second_pass(
 354     const uint16_t *src_ptr,
 355     uint16_t *output_ptr,
 356     unsigned int src_pixels_per_line,
 357     unsigned int pixel_step,
 358     unsigned int output_height,
 359     unsigned int output_width,
 360     const int16_t *vp9_filter) {
 361   unsigned int  i, j;
 362
 363   for (i = 0; i < output_height; i++) {
 364     for (j = 0; j < output_width; j++) {
 365       output_ptr[j] =
 366           ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
 367                              (int)src_ptr[pixel_step] * vp9_filter[1],
 368                              FILTER_BITS);
 369       src_ptr++;
 370     }
 371
 372     src_ptr += src_pixels_per_line - output_width;
 373     output_ptr += output_width;
 374   }
 375 }
 376
 377 #define HIGH_VAR(W, H) \
 378 unsigned int vp9_high_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 379                                             const uint8_t *b, int b_stride, \
 380                                             unsigned int *sse) { \
 381   int sum; \
 382   high_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
 383   return *sse - (((int64_t)sum * sum) / (W * H)); \
 384 } \
 385 \
 386 unsigned int vp9_high_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 387                                                const uint8_t *b, int b_stride, \
 388                                                 unsigned int *sse) { \
 389   int sum; \
 390   high_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
 391   return *sse - (((int64_t)sum * sum) / (W * H)); \
 392 } \
 393 \
 394 unsigned int vp9_high_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 395                                                const uint8_t *b, int b_stride, \
 396                                                unsigned int *sse) { \
 397   int sum; \
 398   high_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
 399   return *sse - (((int64_t)sum * sum) / (W * H)); \
 400 }
 401
 402 #define HIGH_SUBPIX_VAR(W, H) \
 403 unsigned int vp9_high_sub_pixel_variance##W##x##H##_c( \
 404   const uint8_t *src, int  src_stride, \
 405   int xoffset, int  yoffset, \
 406   const uint8_t *dst, int dst_stride, \
 407   unsigned int *sse) { \
 408   uint16_t fdata3[(H + 1) * W]; \
 409   uint16_t temp2[H * W]; \
 410 \
 411   high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
 412                                          W, BILINEAR_FILTERS_2TAP(xoffset)); \
 413   high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 414                                           BILINEAR_FILTERS_2TAP(yoffset)); \
 415 \
 416   return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
 417                                         dst_stride, sse); \
 418 } \
 419 \
 420 unsigned int vp9_high_10_sub_pixel_variance##W##x##H##_c( \
 421   const uint8_t *src, int  src_stride, \
 422   int xoffset, int  yoffset, \
 423   const uint8_t *dst, int dst_stride, \
 424   unsigned int *sse) { \
 425   uint16_t fdata3[(H + 1) * W]; \
 426   uint16_t temp2[H * W]; \
 427 \
 428   high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
 429                                          W, BILINEAR_FILTERS_2TAP(xoffset)); \
 430   high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 431                                           BILINEAR_FILTERS_2TAP(yoffset)); \
 432 \
 433   return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
 434                                            dst_stride, sse); \
 435 } \
 436 \
 437 unsigned int vp9_high_12_sub_pixel_variance##W##x##H##_c( \
 438   const uint8_t *src, int  src_stride, \
 439   int xoffset, int  yoffset, \
 440   const uint8_t *dst, int dst_stride, \
 441   unsigned int *sse) { \
 442   uint16_t fdata3[(H + 1) * W]; \
 443   uint16_t temp2[H * W]; \
 444 \
 445   high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
 446                                          W, BILINEAR_FILTERS_2TAP(xoffset)); \
 447   high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 448                                           BILINEAR_FILTERS_2TAP(yoffset)); \
 449 \
 450   return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
 451                                            dst_stride, sse); \
 452 }
 453
 454 #define HIGH_SUBPIX_AVG_VAR(W, H) \
 455 unsigned int vp9_high_sub_pixel_avg_variance##W##x##H##_c( \
 456   const uint8_t *src, int  src_stride, \
 457   int xoffset, int  yoffset, \
 458   const uint8_t *dst, int dst_stride, \
 459   unsigned int *sse, \
 460   const uint8_t *second_pred) { \
 461   uint16_t fdata3[(H + 1) * W]; \
 462   uint16_t temp2[H * W]; \
 463   DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
 464 \
 465   high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
 466                                          W, BILINEAR_FILTERS_2TAP(xoffset)); \
 467   high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 468                                           BILINEAR_FILTERS_2TAP(yoffset)); \
 469 \
 470   vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
 471                          W); \
 472 \
 473   return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
 474                                         dst_stride, sse); \
 475 } \
 476 \
 477 unsigned int vp9_high_10_sub_pixel_avg_variance##W##x##H##_c( \
 478   const uint8_t *src, int  src_stride, \
 479   int xoffset, int  yoffset, \
 480   const uint8_t *dst, int dst_stride, \
 481   unsigned int *sse, \
 482   const uint8_t *second_pred) { \
 483   uint16_t fdata3[(H + 1) * W]; \
 484   uint16_t temp2[H * W]; \
 485   DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
 486 \
 487   high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
 488                                          W, BILINEAR_FILTERS_2TAP(xoffset)); \
 489   high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 490                                           BILINEAR_FILTERS_2TAP(yoffset)); \
 491 \
 492   vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
 493                          W); \
 494 \
 495   return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
 496                                         dst_stride, sse); \
 497 } \
 498 \
 499 unsigned int vp9_high_12_sub_pixel_avg_variance##W##x##H##_c( \
 500   const uint8_t *src, int  src_stride, \
 501   int xoffset, int  yoffset, \
 502   const uint8_t *dst, int dst_stride, \
 503   unsigned int *sse, \
 504   const uint8_t *second_pred) { \
 505   uint16_t fdata3[(H + 1) * W]; \
 506   uint16_t temp2[H * W]; \
 507   DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
 508 \
 509   high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
 510                                          W, BILINEAR_FILTERS_2TAP(xoffset)); \
 511   high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 512                                           BILINEAR_FILTERS_2TAP(yoffset)); \
 513 \
 514   vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
 515                          W); \
 516 \
 517   return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
 518                                         dst_stride, sse); \
 519 }
 520
 521 #define HIGH_GET_VAR(S) \
 522 void vp9_high_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
 523                                   const uint8_t *ref, int ref_stride, \
 524                                   unsigned int *sse, int *sum) { \
 525   high_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
 526 } \
 527 \
 528 void vp9_high_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
 529                                      const uint8_t *ref, int ref_stride, \
 530                                      unsigned int *sse, int *sum) { \
 531   high_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
 532 } \
 533 \
 534 void vp9_high_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
 535                                      const uint8_t *ref, int ref_stride, \
 536                                      unsigned int *sse, int *sum) { \
 537   high_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
 538 }
 539
 540 #define HIGH_MSE(W, H) \
 541 unsigned int vp9_high_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
 542                                        const uint8_t *ref, int ref_stride, \
 543                                        unsigned int *sse) { \
 544   int sum; \
 545   high_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
 546   return *sse; \
 547 } \
 548 \
 549 unsigned int vp9_high_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
 550                                           const uint8_t *ref, int ref_stride, \
 551                                           unsigned int *sse) { \
 552   int sum; \
 553   high_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
 554   return *sse; \
 555 } \
 556 \
 557 unsigned int vp9_high_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
 558                                           const uint8_t *ref, int ref_stride, \
 559                                           unsigned int *sse) { \
 560   int sum; \
 561   high_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
 562   return *sse; \
 563 }
 564
 565 HIGH_GET_VAR(8)
 566 HIGH_GET_VAR(16)
 567
 568 HIGH_MSE(16, 16)
 569 HIGH_MSE(16, 8)
 570 HIGH_MSE(8, 16)
 571 HIGH_MSE(8, 8)
 572
 573 HIGH_VAR(4, 4)
 574 HIGH_SUBPIX_VAR(4, 4)
 575 HIGH_SUBPIX_AVG_VAR(4, 4)
 576
 577 HIGH_VAR(4, 8)
 578 HIGH_SUBPIX_VAR(4, 8)
 579 HIGH_SUBPIX_AVG_VAR(4, 8)
 580
 581 HIGH_VAR(8, 4)
 582 HIGH_SUBPIX_VAR(8, 4)
 583 HIGH_SUBPIX_AVG_VAR(8, 4)
 584
 585 HIGH_VAR(8, 8)
 586 HIGH_SUBPIX_VAR(8, 8)
 587 HIGH_SUBPIX_AVG_VAR(8, 8)
 588
 589 HIGH_VAR(8, 16)
 590 HIGH_SUBPIX_VAR(8, 16)
 591 HIGH_SUBPIX_AVG_VAR(8, 16)
 592
 593 HIGH_VAR(16, 8)
 594 HIGH_SUBPIX_VAR(16, 8)
 595 HIGH_SUBPIX_AVG_VAR(16, 8)
 596
 597 HIGH_VAR(16, 16)
 598 HIGH_SUBPIX_VAR(16, 16)
 599 HIGH_SUBPIX_AVG_VAR(16, 16)
 600
 601 HIGH_VAR(16, 32)
 602 HIGH_SUBPIX_VAR(16, 32)
 603 HIGH_SUBPIX_AVG_VAR(16, 32)
 604
 605 HIGH_VAR(32, 16)
 606 HIGH_SUBPIX_VAR(32, 16)
 607 HIGH_SUBPIX_AVG_VAR(32, 16)
 608
 609 HIGH_VAR(32, 32)
 610 HIGH_SUBPIX_VAR(32, 32)
 611 HIGH_SUBPIX_AVG_VAR(32, 32)
 612
 613 HIGH_VAR(32, 64)
 614 HIGH_SUBPIX_VAR(32, 64)
 615 HIGH_SUBPIX_AVG_VAR(32, 64)
 616
 617 HIGH_VAR(64, 32)
 618 HIGH_SUBPIX_VAR(64, 32)
 619 HIGH_SUBPIX_AVG_VAR(64, 32)
 620
 621 HIGH_VAR(64, 64)
 622 HIGH_SUBPIX_VAR(64, 64)
 623 HIGH_SUBPIX_AVG_VAR(64, 64)
 624
 625 void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
 626                             int width, int height, const uint8_t *ref8,
 627                             int ref_stride) {
 628   int i, j;
 629   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
 630   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
 631   for (i = 0; i < height; i++) {
 632     for (j = 0; j < width; j++) {
 633       const int tmp = pred[j] + ref[j];
 634       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
 635     }
 636     comp_pred += width;
 637     pred += width;
 638     ref += ref_stride;
 639   }
 640 }
 641 #endif  // CONFIG_VP9_HIGHBITDEPTH