src/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c

   1 /*
   2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12
  13 #include "./vpx_config.h"
  14 #include "./vp9_rtcd.h"
  15 #include "vpx_ports/mem.h"
  16
  17 typedef void filter8_1dfunction (
  18   const unsigned char *src_ptr,
  19   const ptrdiff_t src_pitch,
  20   unsigned char *output_ptr,
  21   ptrdiff_t out_pitch,
  22   unsigned int output_height,
  23   const short *filter
  24 );
  25
  26 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
  27   void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
  28                                    uint8_t *dst, ptrdiff_t dst_stride, \
  29                                    const int16_t *filter_x, int x_step_q4, \
  30                                    const int16_t *filter_y, int y_step_q4, \
  31                                    int w, int h) { \
  32   if (step_q4 == 16 && filter[3] != 128) { \
  33     if (filter[0] || filter[1] || filter[2]) { \
  34       while (w >= 16) { \
  35         vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
  36                                                  src_stride, \
  37                                                  dst, \
  38                                                  dst_stride, \
  39                                                  h, \
  40                                                  filter); \
  41         src += 16; \
  42         dst += 16; \
  43         w -= 16; \
  44       } \
  45       while (w >= 8) { \
  46         vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
  47                                                 src_stride, \
  48                                                 dst, \
  49                                                 dst_stride, \
  50                                                 h, \
  51                                                 filter); \
  52         src += 8; \
  53         dst += 8; \
  54         w -= 8; \
  55       } \
  56       while (w >= 4) { \
  57         vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
  58                                                 src_stride, \
  59                                                 dst, \
  60                                                 dst_stride, \
  61                                                 h, \
  62                                                 filter); \
  63         src += 4; \
  64         dst += 4; \
  65         w -= 4; \
  66       } \
  67     } else { \
  68       while (w >= 16) { \
  69         vp9_filter_block1d16_##dir##2_##avg##opt(src, \
  70                                                  src_stride, \
  71                                                  dst, \
  72                                                  dst_stride, \
  73                                                  h, \
  74                                                  filter); \
  75         src += 16; \
  76         dst += 16; \
  77         w -= 16; \
  78       } \
  79       while (w >= 8) { \
  80         vp9_filter_block1d8_##dir##2_##avg##opt(src, \
  81                                                 src_stride, \
  82                                                 dst, \
  83                                                 dst_stride, \
  84                                                 h, \
  85                                                 filter); \
  86         src += 8; \
  87         dst += 8; \
  88         w -= 8; \
  89       } \
  90       while (w >= 4) { \
  91         vp9_filter_block1d4_##dir##2_##avg##opt(src, \
  92                                                 src_stride, \
  93                                                 dst, \
  94                                                 dst_stride, \
  95                                                 h, \
  96                                                 filter); \
  97         src += 4; \
  98         dst += 4; \
  99         w -= 4; \
 100       } \
 101     } \
 102   } \
 103   if (w) { \
 104     vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
 105                              filter_x, x_step_q4, filter_y, y_step_q4, \
 106                              w, h); \
 107   } \
 108 }
 109
 110 #define FUN_CONV_2D(avg, opt) \
 111 void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
 112                               uint8_t *dst, ptrdiff_t dst_stride, \
 113                               const int16_t *filter_x, int x_step_q4, \
 114                               const int16_t *filter_y, int y_step_q4, \
 115                               int w, int h) { \
 116   assert(w <= 64); \
 117   assert(h <= 64); \
 118   if (x_step_q4 == 16 && y_step_q4 == 16) { \
 119     if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
 120         filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
 121       DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
 122       vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
 123                                 filter_x, x_step_q4, filter_y, y_step_q4, \
 124                                 w, h + 7); \
 125       vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
 126                                       filter_x, x_step_q4, filter_y, \
 127                                       y_step_q4, w, h); \
 128     } else { \
 129       DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \
 130       vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
 131                                 filter_x, x_step_q4, filter_y, y_step_q4, \
 132                                 w, h + 1); \
 133       vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
 134                                       filter_x, x_step_q4, filter_y, \
 135                                       y_step_q4, w, h); \
 136     } \
 137   } else { \
 138     vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
 139                            filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
 140   } \
 141 }
 142 #if HAVE_AVX2
 143 filter8_1dfunction vp9_filter_block1d16_v8_avx2;
 144 filter8_1dfunction vp9_filter_block1d16_h8_avx2;
 145 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
 146 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
 147 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
 148 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
 149 filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
 150 filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
 151 filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
 152 filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
 153 filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
 154 filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
 155 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
 156 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
 157 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
 158 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
 159 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
 160 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
 161 #define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
 162 #define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
 163 #define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
 164 #define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
 165 // void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
 166 //                                uint8_t *dst, ptrdiff_t dst_stride,
 167 //                                const int16_t *filter_x, int x_step_q4,
 168 //                                const int16_t *filter_y, int y_step_q4,
 169 //                                int w, int h);
 170 // void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
 171 //                               uint8_t *dst, ptrdiff_t dst_stride,
 172 //                               const int16_t *filter_x, int x_step_q4,
 173 //                               const int16_t *filter_y, int y_step_q4,
 174 //                               int w, int h);
 175 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
 176 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
 177
 178 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
 179 //                          uint8_t *dst, ptrdiff_t dst_stride,
 180 //                          const int16_t *filter_x, int x_step_q4,
 181 //                          const int16_t *filter_y, int y_step_q4,
 182 //                          int w, int h);
 183 FUN_CONV_2D(, avx2);
 184 #endif
 185 #if HAVE_SSSE3
 186 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
 187 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
 188 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
 189 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
 190 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
 191 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
 192 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
 193 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
 194 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
 195 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
 196 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
 197 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
 198
 199 filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
 200 filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
 201 filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
 202 filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
 203 filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
 204 filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
 205 filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
 206 filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
 207 filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
 208 filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
 209 filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
 210 filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
 211
 212 // void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 213 //                                uint8_t *dst, ptrdiff_t dst_stride,
 214 //                                const int16_t *filter_x, int x_step_q4,
 215 //                                const int16_t *filter_y, int y_step_q4,
 216 //                                int w, int h);
 217 // void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 218 //                               uint8_t *dst, ptrdiff_t dst_stride,
 219 //                               const int16_t *filter_x, int x_step_q4,
 220 //                               const int16_t *filter_y, int y_step_q4,
 221 //                               int w, int h);
 222 // void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 223 //                                    uint8_t *dst, ptrdiff_t dst_stride,
 224 //                                    const int16_t *filter_x, int x_step_q4,
 225 //                                    const int16_t *filter_y, int y_step_q4,
 226 //                                    int w, int h);
 227 // void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 228 //                                   uint8_t *dst, ptrdiff_t dst_stride,
 229 //                                   const int16_t *filter_x, int x_step_q4,
 230 //                                   const int16_t *filter_y, int y_step_q4,
 231 //                                   int w, int h);
 232 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
 233 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
 234 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
 235 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
 236             ssse3);
 237
 238 // void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 239 //                          uint8_t *dst, ptrdiff_t dst_stride,
 240 //                          const int16_t *filter_x, int x_step_q4,
 241 //                          const int16_t *filter_y, int y_step_q4,
 242 //                          int w, int h);
 243 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
 244 //                              uint8_t *dst, ptrdiff_t dst_stride,
 245 //                              const int16_t *filter_x, int x_step_q4,
 246 //                              const int16_t *filter_y, int y_step_q4,
 247 //                              int w, int h);
 248 FUN_CONV_2D(, ssse3);
 249 FUN_CONV_2D(avg_ , ssse3);
 250 #endif
 251
 252 #if HAVE_SSE2
 253 filter8_1dfunction vp9_filter_block1d16_v8_sse2;
 254 filter8_1dfunction vp9_filter_block1d16_h8_sse2;
 255 filter8_1dfunction vp9_filter_block1d8_v8_sse2;
 256 filter8_1dfunction vp9_filter_block1d8_h8_sse2;
 257 filter8_1dfunction vp9_filter_block1d4_v8_sse2;
 258 filter8_1dfunction vp9_filter_block1d4_h8_sse2;
 259 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
 260 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
 261 filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
 262 filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
 263 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
 264 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
 265
 266 filter8_1dfunction vp9_filter_block1d16_v2_sse2;
 267 filter8_1dfunction vp9_filter_block1d16_h2_sse2;
 268 filter8_1dfunction vp9_filter_block1d8_v2_sse2;
 269 filter8_1dfunction vp9_filter_block1d8_h2_sse2;
 270 filter8_1dfunction vp9_filter_block1d4_v2_sse2;
 271 filter8_1dfunction vp9_filter_block1d4_h2_sse2;
 272 filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
 273 filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
 274 filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
 275 filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
 276 filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
 277 filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
 278
 279 // void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
 280 //                               uint8_t *dst, ptrdiff_t dst_stride,
 281 //                               const int16_t *filter_x, int x_step_q4,
 282 //                               const int16_t *filter_y, int y_step_q4,
 283 //                               int w, int h);
 284 // void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
 285 //                              uint8_t *dst, ptrdiff_t dst_stride,
 286 //                              const int16_t *filter_x, int x_step_q4,
 287 //                              const int16_t *filter_y, int y_step_q4,
 288 //                              int w, int h);
 289 // void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
 290 //                                   uint8_t *dst, ptrdiff_t dst_stride,
 291 //                                   const int16_t *filter_x, int x_step_q4,
 292 //                                   const int16_t *filter_y, int y_step_q4,
 293 //                                   int w, int h);
 294 // void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
 295 //                                  uint8_t *dst, ptrdiff_t dst_stride,
 296 //                                  const int16_t *filter_x, int x_step_q4,
 297 //                                  const int16_t *filter_y, int y_step_q4,
 298 //                                  int w, int h);
 299 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
 300 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
 301 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
 302 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
 303
 304 // void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
 305 //                         uint8_t *dst, ptrdiff_t dst_stride,
 306 //                         const int16_t *filter_x, int x_step_q4,
 307 //                         const int16_t *filter_y, int y_step_q4,
 308 //                         int w, int h);
 309 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
 310 //                             uint8_t *dst, ptrdiff_t dst_stride,
 311 //                             const int16_t *filter_x, int x_step_q4,
 312 //                             const int16_t *filter_y, int y_step_q4,
 313 //                             int w, int h);
 314 FUN_CONV_2D(, sse2);
 315 FUN_CONV_2D(avg_ , sse2);
 316 #endif