src/third_party/libvpx/source/libvpx/vp9/common/vp9_convolve.c

   1 /*
   2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12
  13 #include "./vpx_config.h"
  14 #include "./vp9_rtcd.h"
  15 #include "vp9/common/vp9_common.h"
  16 #include "vp9/common/vp9_convolve.h"
  17 #include "vp9/common/vp9_filter.h"
  18 #include "vpx/vpx_integer.h"
  19 #include "vpx_ports/mem.h"
  20
  21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
  22                              uint8_t *dst, ptrdiff_t dst_stride,
  23                              const int16_t *filter_x0, int x_step_q4,
  24                              const int16_t *filter_y, int y_step_q4,
  25                              int w, int h, int taps) {
  26   int x, y, k;
  27
  28   /* NOTE: This assumes that the filter table is 256-byte aligned. */
  29   /* TODO(agrange) Modify to make independent of table alignment. */
  30   const int16_t *const filter_x_base =
  31       (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
  32
  33   /* Adjust base pointer address for this source line */
  34   src -= taps / 2 - 1;
  35
  36   for (y = 0; y < h; ++y) {
  37     /* Initial phase offset */
  38     int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
  39
  40     for (x = 0; x < w; ++x) {
  41       /* Per-pixel src offset */
  42       const int src_x = x_q4 >> SUBPEL_BITS;
  43       int sum = 0;
  44
  45       /* Pointer to filter to use */
  46       const int16_t *const filter_x = filter_x_base +
  47           (x_q4 & SUBPEL_MASK) * taps;
  48
  49       for (k = 0; k < taps; ++k)
  50         sum += src[src_x + k] * filter_x[k];
  51
  52       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
  53
  54       /* Move to the next source pixel */
  55       x_q4 += x_step_q4;
  56     }
  57     src += src_stride;
  58     dst += dst_stride;
  59   }
  60 }
  61
  62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
  63                                  uint8_t *dst, ptrdiff_t dst_stride,
  64                                  const int16_t *filter_x0, int x_step_q4,
  65                                  const int16_t *filter_y, int y_step_q4,
  66                                  int w, int h, int taps) {
  67   int x, y, k;
  68
  69   /* NOTE: This assumes that the filter table is 256-byte aligned. */
  70   /* TODO(agrange) Modify to make independent of table alignment. */
  71   const int16_t *const filter_x_base =
  72       (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
  73
  74   /* Adjust base pointer address for this source line */
  75   src -= taps / 2 - 1;
  76
  77   for (y = 0; y < h; ++y) {
  78     /* Initial phase offset */
  79     int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
  80
  81     for (x = 0; x < w; ++x) {
  82       /* Per-pixel src offset */
  83       const int src_x = x_q4 >> SUBPEL_BITS;
  84       int sum = 0;
  85
  86       /* Pointer to filter to use */
  87       const int16_t *const filter_x = filter_x_base +
  88           (x_q4 & SUBPEL_MASK) * taps;
  89
  90       for (k = 0; k < taps; ++k)
  91         sum += src[src_x + k] * filter_x[k];
  92
  93       dst[x] = ROUND_POWER_OF_TWO(dst[x] +
  94                    clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
  95
  96       /* Move to the next source pixel */
  97       x_q4 += x_step_q4;
  98     }
  99     src += src_stride;
 100     dst += dst_stride;
 101   }
 102 }
 103
 104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
 105                             uint8_t *dst, ptrdiff_t dst_stride,
 106                             const int16_t *filter_x, int x_step_q4,
 107                             const int16_t *filter_y0, int y_step_q4,
 108                             int w, int h, int taps) {
 109   int x, y, k;
 110
 111   /* NOTE: This assumes that the filter table is 256-byte aligned. */
 112   /* TODO(agrange) Modify to make independent of table alignment. */
 113   const int16_t *const filter_y_base =
 114       (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
 115
 116   /* Adjust base pointer address for this source column */
 117   src -= src_stride * (taps / 2 - 1);
 118
 119   for (x = 0; x < w; ++x) {
 120     /* Initial phase offset */
 121     int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
 122
 123     for (y = 0; y < h; ++y) {
 124       /* Per-pixel src offset */
 125       const int src_y = y_q4 >> SUBPEL_BITS;
 126       int sum = 0;
 127
 128       /* Pointer to filter to use */
 129       const int16_t *const filter_y = filter_y_base +
 130           (y_q4 & SUBPEL_MASK) * taps;
 131
 132       for (k = 0; k < taps; ++k)
 133         sum += src[(src_y + k) * src_stride] * filter_y[k];
 134
 135       dst[y * dst_stride] =
 136           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
 137
 138       /* Move to the next source pixel */
 139       y_q4 += y_step_q4;
 140     }
 141     ++src;
 142     ++dst;
 143   }
 144 }
 145
 146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
 147                                 uint8_t *dst, ptrdiff_t dst_stride,
 148                                 const int16_t *filter_x, int x_step_q4,
 149                                 const int16_t *filter_y0, int y_step_q4,
 150                                 int w, int h, int taps) {
 151   int x, y, k;
 152
 153   /* NOTE: This assumes that the filter table is 256-byte aligned. */
 154   /* TODO(agrange) Modify to make independent of table alignment. */
 155   const int16_t *const filter_y_base =
 156       (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
 157
 158   /* Adjust base pointer address for this source column */
 159   src -= src_stride * (taps / 2 - 1);
 160
 161   for (x = 0; x < w; ++x) {
 162     /* Initial phase offset */
 163     int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
 164
 165     for (y = 0; y < h; ++y) {
 166       /* Per-pixel src offset */
 167       const int src_y = y_q4 >> SUBPEL_BITS;
 168       int sum = 0;
 169
 170       /* Pointer to filter to use */
 171       const int16_t *const filter_y = filter_y_base +
 172           (y_q4 & SUBPEL_MASK) * taps;
 173
 174       for (k = 0; k < taps; ++k)
 175         sum += src[(src_y + k) * src_stride] * filter_y[k];
 176
 177       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
 178            clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
 179
 180       /* Move to the next source pixel */
 181       y_q4 += y_step_q4;
 182     }
 183     ++src;
 184     ++dst;
 185   }
 186 }
 187
 188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
 189                        uint8_t *dst, ptrdiff_t dst_stride,
 190                        const int16_t *filter_x, int x_step_q4,
 191                        const int16_t *filter_y, int y_step_q4,
 192                        int w, int h, int taps) {
 193   /* Fixed size intermediate buffer places limits on parameters.
 194    * Maximum intermediate_height is 324, for y_step_q4 == 80,
 195    * h == 64, taps == 8.
 196    * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
 197    */
 198   uint8_t temp[64 * 324];
 199   int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
 200
 201   assert(w <= 64);
 202   assert(h <= 64);
 203   assert(taps <= 8);
 204   assert(y_step_q4 <= 80);
 205   assert(x_step_q4 <= 80);
 206
 207   if (intermediate_height < h)
 208     intermediate_height = h;
 209
 210   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
 211                    filter_x, x_step_q4, filter_y, y_step_q4, w,
 212                    intermediate_height, taps);
 213   convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
 214                   x_step_q4, filter_y, y_step_q4, w, h, taps);
 215 }
 216
 217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
 218                            uint8_t *dst, ptrdiff_t dst_stride,
 219                            const int16_t *filter_x, int x_step_q4,
 220                            const int16_t *filter_y, int y_step_q4,
 221                            int w, int h) {
 222   convolve_horiz_c(src, src_stride, dst, dst_stride,
 223                    filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
 224 }
 225
 226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
 227                                uint8_t *dst, ptrdiff_t dst_stride,
 228                                const int16_t *filter_x, int x_step_q4,
 229                                const int16_t *filter_y, int y_step_q4,
 230                                int w, int h) {
 231   convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
 232                        filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
 233 }
 234
 235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
 236                           uint8_t *dst, ptrdiff_t dst_stride,
 237                           const int16_t *filter_x, int x_step_q4,
 238                           const int16_t *filter_y, int y_step_q4,
 239                           int w, int h) {
 240   convolve_vert_c(src, src_stride, dst, dst_stride,
 241                   filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
 242 }
 243
 244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
 245                               uint8_t *dst, ptrdiff_t dst_stride,
 246                               const int16_t *filter_x, int x_step_q4,
 247                               const int16_t *filter_y, int y_step_q4,
 248                               int w, int h) {
 249   convolve_avg_vert_c(src, src_stride, dst, dst_stride,
 250                       filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
 251 }
 252
 253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
 254                      uint8_t *dst, ptrdiff_t dst_stride,
 255                      const int16_t *filter_x, int x_step_q4,
 256                      const int16_t *filter_y, int y_step_q4,
 257                      int w, int h) {
 258   convolve_c(src, src_stride, dst, dst_stride,
 259              filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
 260 }
 261
 262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
 263                          uint8_t *dst, ptrdiff_t dst_stride,
 264                          const int16_t *filter_x, int x_step_q4,
 265                          const int16_t *filter_y, int y_step_q4,
 266                          int w, int h) {
 267   /* Fixed size intermediate buffer places limits on parameters. */
 268   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
 269   assert(w <= 64);
 270   assert(h <= 64);
 271
 272   vp9_convolve8(src, src_stride, temp, 64,
 273                filter_x, x_step_q4, filter_y, y_step_q4, w, h);
 274   vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
 275 }
 276
 277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
 278                          uint8_t *dst, ptrdiff_t dst_stride,
 279                          const int16_t *filter_x, int filter_x_stride,
 280                          const int16_t *filter_y, int filter_y_stride,
 281                          int w, int h) {
 282   int r;
 283
 284   for (r = h; r > 0; --r) {
 285     vpx_memcpy(dst, src, w);
 286     src += src_stride;
 287     dst += dst_stride;
 288   }
 289 }
 290
 291 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
 292                         uint8_t *dst, ptrdiff_t dst_stride,
 293                         const int16_t *filter_x, int filter_x_stride,
 294                         const int16_t *filter_y, int filter_y_stride,
 295                         int w, int h) {
 296   int x, y;
 297
 298   for (y = 0; y < h; ++y) {
 299     for (x = 0; x < w; ++x)
 300       dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
 301
 302     src += src_stride;
 303     dst += dst_stride;
 304   }
 305 }