src/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "./vp9_rtcd.h"
  12
  13 #include "vpx_ports/mem.h"
  14 #include "vpx/vpx_integer.h"
  15
  16 #include "vp9/common/vp9_common.h"
  17 #include "vp9/common/vp9_filter.h"
  18
  19 #include "vp9/encoder/vp9_variance.h"
  20
  21 void variance(const uint8_t *a, int  a_stride,
  22               const uint8_t *b, int  b_stride,
  23               int  w, int  h, unsigned int *sse, int *sum) {
  24   int i, j;
  25
  26   *sum = 0;
  27   *sse = 0;
  28
  29   for (i = 0; i < h; i++) {
  30     for (j = 0; j < w; j++) {
  31       const int diff = a[j] - b[j];
  32       *sum += diff;
  33       *sse += diff * diff;
  34     }
  35
  36     a += a_stride;
  37     b += b_stride;
  38   }
  39 }
  40
  41 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  42 // or vertical direction to produce the filtered output block. Used to implement
  43 // first-pass of 2-D separable filter.
  44 //
  45 // Produces int32_t output to retain precision for next pass. Two filter taps
  46 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
  47 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
  48 // defines the offset required to move from one input to the next.
  49 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
  50                                               uint16_t *output_ptr,
  51                                               unsigned int src_pixels_per_line,
  52                                               int pixel_step,
  53                                               unsigned int output_height,
  54                                               unsigned int output_width,
  55                                               const int16_t *vp9_filter) {
  56   unsigned int i, j;
  57
  58   for (i = 0; i < output_height; i++) {
  59     for (j = 0; j < output_width; j++) {
  60       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  61                           (int)src_ptr[pixel_step] * vp9_filter[1],
  62                           FILTER_BITS);
  63
  64       src_ptr++;
  65     }
  66
  67     // Next row...
  68     src_ptr    += src_pixels_per_line - output_width;
  69     output_ptr += output_width;
  70   }
  71 }
  72
  73 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  74 // or vertical direction to produce the filtered output block. Used to implement
  75 // second-pass of 2-D separable filter.
  76 //
  77 // Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
  78 // filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
  79 // filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
  80 // stride). It defines the offset required to move from one input to the next.
  81 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
  82                                                uint8_t *output_ptr,
  83                                                unsigned int src_pixels_per_line,
  84                                                unsigned int pixel_step,
  85                                                unsigned int output_height,
  86                                                unsigned int output_width,
  87                                                const int16_t *vp9_filter) {
  88   unsigned int  i, j;
  89
  90   for (i = 0; i < output_height; i++) {
  91     for (j = 0; j < output_width; j++) {
  92       output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  93                           (int)src_ptr[pixel_step] * vp9_filter[1],
  94                           FILTER_BITS);
  95       src_ptr++;
  96     }
  97
  98     src_ptr += src_pixels_per_line - output_width;
  99     output_ptr += output_width;
 100   }
 101 }
 102
 103 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
 104   unsigned int i, sum = 0;
 105
 106   for (i = 0; i < 256; i++)
 107     sum += src_ptr[i] * src_ptr[i];
 108
 109   return sum;
 110 }
 111
 112 #define VAR(W, H) \
 113 unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 114                                        const uint8_t *b, int b_stride, \
 115                                        unsigned int *sse) { \
 116   int sum; \
 117   variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
 118   return *sse - (((int64_t)sum * sum) / (W * H)); \
 119 }
 120
 121 #define SUBPIX_VAR(W, H) \
 122 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
 123   const uint8_t *src, int  src_stride, \
 124   int xoffset, int  yoffset, \
 125   const uint8_t *dst, int dst_stride, \
 126   unsigned int *sse) { \
 127   uint16_t fdata3[(H + 1) * W]; \
 128   uint8_t temp2[H * W]; \
 129 \
 130   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
 131                                     BILINEAR_FILTERS_2TAP(xoffset)); \
 132   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 133                                      BILINEAR_FILTERS_2TAP(yoffset)); \
 134 \
 135   return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
 136 }
 137
 138 #define SUBPIX_AVG_VAR(W, H) \
 139 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
 140   const uint8_t *src, int  src_stride, \
 141   int xoffset, int  yoffset, \
 142   const uint8_t *dst, int dst_stride, \
 143   unsigned int *sse, \
 144   const uint8_t *second_pred) { \
 145   uint16_t fdata3[(H + 1) * W]; \
 146   uint8_t temp2[H * W]; \
 147   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, H * W); \
 148 \
 149   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
 150                                     BILINEAR_FILTERS_2TAP(xoffset)); \
 151   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
 152                                      BILINEAR_FILTERS_2TAP(yoffset)); \
 153 \
 154   vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
 155 \
 156   return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
 157 }
 158
 159 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
 160                        const uint8_t *ref_ptr, int ref_stride,
 161                        unsigned int *sse, int *sum) {
 162   variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
 163 }
 164
 165 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
 166                      const uint8_t *ref_ptr, int ref_stride,
 167                      unsigned int *sse, int *sum) {
 168   variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
 169 }
 170
 171 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
 172                             const uint8_t *ref, int ref_stride,
 173                             unsigned int *sse) {
 174   int sum;
 175   variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
 176   return *sse;
 177 }
 178
 179 unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
 180                            const uint8_t *ref, int ref_stride,
 181                            unsigned int *sse) {
 182   int sum;
 183   variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
 184   return *sse;
 185 }
 186
 187 unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
 188                            const uint8_t *ref, int ref_stride,
 189                            unsigned int *sse) {
 190   int sum;
 191   variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
 192   return *sse;
 193 }
 194
 195 unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
 196                           const uint8_t *ref, int ref_stride,
 197                           unsigned int *sse) {
 198   int sum;
 199   variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
 200   return *sse;
 201 }
 202
 203 VAR(4, 4)
 204 SUBPIX_VAR(4, 4)
 205 SUBPIX_AVG_VAR(4, 4)
 206
 207 VAR(4, 8)
 208 SUBPIX_VAR(4, 8)
 209 SUBPIX_AVG_VAR(4, 8)
 210
 211 VAR(8, 4)
 212 SUBPIX_VAR(8, 4)
 213 SUBPIX_AVG_VAR(8, 4)
 214
 215 VAR(8, 8)
 216 SUBPIX_VAR(8, 8)
 217 SUBPIX_AVG_VAR(8, 8)
 218
 219 VAR(8, 16)
 220 SUBPIX_VAR(8, 16)
 221 SUBPIX_AVG_VAR(8, 16)
 222
 223 VAR(16, 8)
 224 SUBPIX_VAR(16, 8)
 225 SUBPIX_AVG_VAR(16, 8)
 226
 227 VAR(16, 16)
 228 SUBPIX_VAR(16, 16)
 229 SUBPIX_AVG_VAR(16, 16)
 230
 231 VAR(16, 32)
 232 SUBPIX_VAR(16, 32)
 233 SUBPIX_AVG_VAR(16, 32)
 234
 235 VAR(32, 16)
 236 SUBPIX_VAR(32, 16)
 237 SUBPIX_AVG_VAR(32, 16)
 238
 239 VAR(32, 32)
 240 SUBPIX_VAR(32, 32)
 241 SUBPIX_AVG_VAR(32, 32)
 242
 243 VAR(32, 64)
 244 SUBPIX_VAR(32, 64)
 245 SUBPIX_AVG_VAR(32, 64)
 246
 247 VAR(64, 32)
 248 SUBPIX_VAR(64, 32)
 249 SUBPIX_AVG_VAR(64, 32)
 250
 251 VAR(64, 64)
 252 SUBPIX_VAR(64, 64)
 253 SUBPIX_AVG_VAR(64, 64)
 254
 255 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
 256                        int height, const uint8_t *ref, int ref_stride) {
 257   int i, j;
 258
 259   for (i = 0; i < height; i++) {
 260     for (j = 0; j < width; j++) {
 261       const int tmp = pred[j] + ref[j];
 262       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
 263     }
 264     comp_pred += width;
 265     pred += width;
 266     ref += ref_stride;
 267   }
 268 }