Code refactor on InterpKernel

author Zoe Liu <zoeliu@google.com>

Wed, 22 Jul 2015 17:40:42 +0000 (10:40 -0700)

committer Zoe Liu <zoeliu@google.com>

Fri, 31 Jul 2015 17:27:33 +0000 (10:27 -0700)
author Zoe Liu <zoeliu@google.com>
Wed, 22 Jul 2015 17:40:42 +0000 (10:40 -0700)
committer Zoe Liu <zoeliu@google.com>
Fri, 31 Jul 2015 17:27:33 +0000 (10:27 -0700)
diff --git a/test/convolve_test.cc b/test/convolve_test.cc

index b66da68..90f9579 100644 (file)
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -14,12 +14,15 @@
  
  #include "./vpx_config.h"
  #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
  #include "test/acm_random.h"
  #include "test/clear_system_state.h"
  #include "test/register_state_check.h"
  #include "test/util.h"
  #include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_filter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
  #include "vpx_mem/vpx_mem.h"
  #include "vpx_ports/mem.h"
  
@@ -945,7 +948,7 @@ void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
                                    filter_x_stride, filter_y, filter_y_stride,
                                    w, h, 8);
  }
@@ -957,7 +960,7 @@ void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                       const int16_t *filter_y,
                                       int filter_y_stride,
                                       int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                        filter_x, filter_x_stride,
                                        filter_y, filter_y_stride, w, h, 8);
  }
@@ -969,7 +972,7 @@ void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 8);
  }
@@ -981,7 +984,7 @@ void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                      const int16_t *filter_y,
                                      int filter_y_stride,
                                      int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                       filter_x, filter_x_stride,
                                       filter_y, filter_y_stride, w, h, 8);
  }
@@ -993,7 +996,7 @@ void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 8);
  }
@@ -1005,7 +1008,7 @@ void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 8);
  }
@@ -1017,7 +1020,7 @@ void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 10);
  }
@@ -1029,7 +1032,7 @@ void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                        const int16_t *filter_y,
                                        int filter_y_stride,
                                        int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                        filter_x, filter_x_stride,
                                        filter_y, filter_y_stride, w, h, 10);
  }
@@ -1041,7 +1044,7 @@ void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 10);
  }
@@ -1053,7 +1056,7 @@ void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                       const int16_t *filter_y,
                                       int filter_y_stride,
                                       int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                       filter_x, filter_x_stride,
                                       filter_y, filter_y_stride, w, h, 10);
  }
@@ -1065,7 +1068,7 @@ void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 10);
  }
@@ -1077,7 +1080,7 @@ void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 10);
  }
@@ -1089,7 +1092,7 @@ void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 12);
  }
@@ -1101,7 +1104,7 @@ void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                        const int16_t *filter_y,
                                        int filter_y_stride,
                                        int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
                                        filter_x, filter_x_stride,
                                        filter_y, filter_y_stride, w, h, 12);
  }
@@ -1113,7 +1116,7 @@ void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
                                   filter_x, filter_x_stride,
                                   filter_y, filter_y_stride, w, h, 12);
  }
@@ -1125,7 +1128,7 @@ void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                       const int16_t *filter_y,
                                       int filter_y_stride,
                                       int w, int h) {
-  vp9_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
                                       filter_x, filter_x_stride,
                                       filter_y, filter_y_stride, w, h, 12);
  }
@@ -1137,7 +1140,7 @@ void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 12);
  }
@@ -1149,7 +1152,7 @@ void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_y,
                                  int filter_y_stride,
                                  int w, int h) {
-  vp9_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
                                  filter_x, filter_x_stride,
                                  filter_y, filter_y_stride, w, h, 12);
  }
@@ -1162,7 +1165,7 @@ void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 8);
  }
@@ -1174,7 +1177,7 @@ void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y,
                             int filter_y_stride,
                             int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 8);
  }
@@ -1186,7 +1189,7 @@ void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 8);
  }
@@ -1198,7 +1201,7 @@ void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 8);
  }
@@ -1210,7 +1213,7 @@ void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 8);
  }
@@ -1222,7 +1225,7 @@ void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_y,
                                   int filter_y_stride,
                                   int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 8);
  }
@@ -1234,7 +1237,7 @@ void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
                          const int16_t *filter_y,
                          int filter_y_stride,
                          int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                           filter_x, filter_x_stride,
                           filter_y, filter_y_stride, w, h, 8);
  }
@@ -1246,7 +1249,7 @@ void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 8);
  }
@@ -1258,7 +1261,7 @@ void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 10);
  }
@@ -1270,7 +1273,7 @@ void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 10);
  }
@@ -1282,7 +1285,7 @@ void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 10);
  }
@@ -1294,7 +1297,7 @@ void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 10);
  }
@@ -1306,7 +1309,7 @@ void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 10);
  }
@@ -1318,7 +1321,7 @@ void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 10);
  }
@@ -1330,7 +1333,7 @@ void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
                           const int16_t *filter_y,
                           int filter_y_stride,
                           int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                           filter_x, filter_x_stride,
                           filter_y, filter_y_stride, w, h, 10);
  }
@@ -1342,7 +1345,7 @@ void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 10);
  }
@@ -1354,7 +1357,7 @@ void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 12);
  }
@@ -1366,7 +1369,7 @@ void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_y,
                              int filter_y_stride,
                              int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, filter_x_stride,
                              filter_y, filter_y_stride, w, h, 12);
  }
@@ -1378,7 +1381,7 @@ void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                 const int16_t *filter_y,
                                 int filter_y_stride,
                                 int w, int h) {
-  vp9_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                 filter_x, filter_x_stride,
                                 filter_y, filter_y_stride, w, h, 12);
  }
@@ -1390,7 +1393,7 @@ void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                     const int16_t *filter_y,
                                     int filter_y_stride,
                                     int w, int h) {
-  vp9_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                     filter_x, filter_x_stride,
                                     filter_y, filter_y_stride, w, h, 12);
  }
@@ -1402,7 +1405,7 @@ void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                const int16_t *filter_y,
                                int filter_y_stride,
                                int w, int h) {
-  vp9_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
                                filter_x, filter_x_stride,
                                filter_y, filter_y_stride, w, h, 12);
  }
@@ -1414,7 +1417,7 @@ void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
                                    const int16_t *filter_y,
                                    int filter_y_stride,
                                    int w, int h) {
-  vp9_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                    filter_x, filter_x_stride,
                                    filter_y, filter_y_stride, w, h, 12);
  }
@@ -1426,7 +1429,7 @@ void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
                           const int16_t *filter_y,
                           int filter_y_stride,
                           int w, int h) {
-  vp9_highbd_convolve8_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
                           filter_x, filter_x_stride,
                           filter_y, filter_y_stride, w, h, 12);
  }
@@ -1438,7 +1441,7 @@ void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_y,
                               int filter_y_stride,
                               int w, int h) {
-  vp9_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
+  vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
                               filter_x, filter_x_stride,
                               filter_y, filter_y_stride, w, h, 12);
  }
@@ -1504,10 +1507,10 @@ INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
  #else
  
  const ConvolveFunctions convolve8_c(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
-    vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
-    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
+    vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
+    vpx_convolve8_c, vpx_convolve8_avg_c, 0);
  
  INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_c),
@@ -1585,13 +1588,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
  #else
  const ConvolveFunctions convolve8_sse2(
  #if CONFIG_USE_X86INC
-    vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
+    vpx_convolve_copy_sse2, vpx_convolve_avg_sse2,
  #else
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
  #endif  // CONFIG_USE_X86INC
-    vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
-    vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
-    vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
+    vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2,
+    vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2,
+    vpx_convolve8_sse2, vpx_convolve8_avg_sse2, 0);
  
  INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_sse2),
@@ -1612,10 +1615,10 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
  
  #if HAVE_SSSE3
  const ConvolveFunctions convolve8_ssse3(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3,
+    vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3,
+    vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, 0);
  
  INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_ssse3),
@@ -1635,10 +1638,10 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
  
  #if HAVE_AVX2 && HAVE_SSSE3
  const ConvolveFunctions convolve8_avx2(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
+    vpx_convolve_copy_c, vpx_convolve_avg_c,
+    vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_ssse3,
+    vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_ssse3,
+    vpx_convolve8_avx2, vpx_convolve8_avg_ssse3, 0);
  
  INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_avx2),
@@ -1659,16 +1662,16 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
  #if HAVE_NEON
  #if HAVE_NEON_ASM
  const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
+    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
+    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
+    vpx_convolve8_neon, vpx_convolve8_avg_neon, 0);
  #else  // HAVE_NEON
  const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+    vpx_convolve_copy_neon, vpx_convolve_avg_neon,
+    vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon,
+    vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon,
+    vpx_convolve8_neon, vpx_convolve8_avg_neon, 0);
  #endif  // HAVE_NEON_ASM
  
  INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
@@ -1689,10 +1692,10 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
  
  #if HAVE_DSPR2
  const ConvolveFunctions convolve8_dspr2(
-    vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2,
-    vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
-    vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
-    vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
+    vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2,
+    vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2,
+    vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2,
+    vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2, 0);
  
  INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_dspr2),
@@ -1712,10 +1715,10 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
  
  #if HAVE_MSA
  const ConvolveFunctions convolve8_msa(
-    vp9_convolve_copy_msa, vp9_convolve_avg_msa,
-    vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_msa,
-    vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_msa,
-    vp9_convolve8_msa, vp9_convolve8_avg_msa, 0);
+    vpx_convolve_copy_msa, vpx_convolve_avg_msa,
+    vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa,
+    vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa,
+    vpx_convolve8_msa, vpx_convolve8_avg_msa, 0);
  
  INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_msa),
diff --git a/vp9/common/arm/neon/vp9_convolve8_avg_neon.c b/vp9/common/arm/neon/vp9_convolve8_avg_neon.c

deleted file mode 100644 (file)

index dd569d3..0000000
--- a/vp9/common/arm/neon/vp9_convolve8_avg_neon.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-#include "./vpx_config.h"
-#include "vpx_ports/mem.h"
-
-void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h);
-void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h);
-
-static INLINE int32x4_t MULTIPLY_BY_Q0(
-        int16x4_t dsrc0,
-        int16x4_t dsrc1,
-        int16x4_t dsrc2,
-        int16x4_t dsrc3,
-        int16x4_t dsrc4,
-        int16x4_t dsrc5,
-        int16x4_t dsrc6,
-        int16x4_t dsrc7,
-        int16x8_t q0s16) {
-    int32x4_t qdst;
-    int16x4_t d0s16, d1s16;
-
-    d0s16 = vget_low_s16(q0s16);
-    d1s16 = vget_high_s16(q0s16);
-
-    qdst = vmull_lane_s16(dsrc0, d0s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
-    qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
-    return qdst;
-}
-
-void vp9_convolve8_avg_horiz_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,
-        int x_step_q4,
-        const int16_t *filter_y,  // unused
-        int y_step_q4,            // unused
-        int w,
-        int h) {
-    int width;
-    uint8_t *s, *d;
-    uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
-    uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32;
-    uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-    uint16x8x2_t q0x2u16;
-    uint8x8x2_t d0x2u8, d1x2u8;
-    uint32x2x2_t d0x2u32;
-    uint16x4x2_t d0x2u16, d1x2u16;
-    uint32x4x2_t q0x2u32;
-
-    if (x_step_q4 != 16) {
-        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                                  filter_x, x_step_q4,
-                                  filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    q0s16 = vld1q_s16(filter_x);
-
-    src -= 3;  // adjust for taps
-    for (; h > 0; h -= 4) {  // loop_horiz_v
-        s = src;
-        d24u8 = vld1_u8(s);
-        s += src_stride;
-        d25u8 = vld1_u8(s);
-        s += src_stride;
-        d26u8 = vld1_u8(s);
-        s += src_stride;
-        d27u8 = vld1_u8(s);
-
-        q12u8 = vcombine_u8(d24u8, d25u8);
-        q13u8 = vcombine_u8(d26u8, d27u8);
-
-        q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
-                            vreinterpretq_u16_u8(q13u8));
-        d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
-        d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
-        d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
-        d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
-        d0x2u8 = vtrn_u8(d24u8, d25u8);
-        d1x2u8 = vtrn_u8(d26u8, d27u8);
-
-        __builtin_prefetch(src + src_stride * 4);
-        __builtin_prefetch(src + src_stride * 5);
-
-        q8u16 = vmovl_u8(d0x2u8.val[0]);
-        q9u16 = vmovl_u8(d0x2u8.val[1]);
-        q10u16 = vmovl_u8(d1x2u8.val[0]);
-        q11u16 = vmovl_u8(d1x2u8.val[1]);
-
-        src += 7;
-        d16u16 = vget_low_u16(q8u16);
-        d17u16 = vget_high_u16(q8u16);
-        d18u16 = vget_low_u16(q9u16);
-        d19u16 = vget_high_u16(q9u16);
-        q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
-        q9u16 = vcombine_u16(d17u16, d19u16);
-
-        d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
-        for (width = w;
-             width > 0;
-             width -= 4, src += 4, dst += 4) {  // loop_horiz
-            s = src;
-            d28u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d29u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d31u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d30u32 = vld1_dup_u32((const uint32_t *)s);
-
-            __builtin_prefetch(src + 64);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
-                               vreinterpret_u16_u32(d31u32));
-            d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
-                               vreinterpret_u16_u32(d30u32));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
-                             vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
-            d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
-                             vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
-
-            __builtin_prefetch(src + 64 + src_stride);
-
-            q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
-            q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
-            q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
-                                vreinterpretq_u32_u8(q15u8));
-
-            d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
-            d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
-            q12u16 = vmovl_u8(d28u8);
-            q13u16 = vmovl_u8(d29u8);
-
-            __builtin_prefetch(src + 64 + src_stride * 2);
-
-            d = dst;
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
-            d += dst_stride;
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-            d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-            d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
-                                    d18s16, d19s16, d23s16, d24s16, q0s16);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
-                                    d19s16, d23s16, d24s16, d26s16, q0s16);
-            q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
-                                    d23s16, d24s16, d26s16, d27s16, q0s16);
-            q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            __builtin_prefetch(src + 64 + src_stride * 3);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u8 = vqmovn_u16(q1u16);
-            d3u8 = vqmovn_u16(q2u16);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
-                               vreinterpret_u16_u8(d3u8));
-            d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
-                               vreinterpret_u32_u16(d0x2u16.val[1]));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
-                             vreinterpret_u8_u32(d0x2u32.val[1]));
-
-            q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
-            q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
-
-            q1u8 = vrhaddq_u8(q1u8, q3u8);
-
-            d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
-            d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
-
-            d = dst;
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-
-            q8u16 = q9u16;
-            d20s16 = d23s16;
-            q11u16 = q12u16;
-            q9u16 = q13u16;
-            d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        }
-        src += src_stride * 4 - w - 7;
-        dst += dst_stride * 4 - w;
-    }
-    return;
-}
-
-void vp9_convolve8_avg_vert_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,  // unused
-        int x_step_q4,            // unused
-        const int16_t *filter_y,
-        int y_step_q4,
-        int w,
-        int h) {
-    int height;
-    uint8_t *s, *d;
-    uint8x8_t d2u8, d3u8;
-    uint32x2_t d2u32, d3u32, d6u32, d7u32;
-    uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
-    uint8x16_t q1u8, q3u8;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-
-    if (y_step_q4 != 16) {
-        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                                 filter_x, x_step_q4,
-                                 filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    src -= src_stride * 3;
-    q0s16 = vld1q_s16(filter_y);
-    for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
-        s = src;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
-        s += src_stride;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
-        s += src_stride;
-        d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
-        s += src_stride;
-
-        q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
-        q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
-        q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
-        q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
-
-        d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-        d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d = dst;
-        for (height = h; height > 0; height -= 4) {  // loop_vert
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
-            s += src_stride;
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
-            s += src_stride;
-
-            q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
-            q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
-
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
-            d += dst_stride;
-            d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
-            d += dst_stride;
-            d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
-            d -= dst_stride * 3;
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-            d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            __builtin_prefetch(s);
-            __builtin_prefetch(s + src_stride);
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
-                                    d20s16, d21s16, d22s16, d24s16, q0s16);
-            __builtin_prefetch(s + src_stride * 2);
-            __builtin_prefetch(s + src_stride * 3);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
-                                    d21s16, d22s16, d24s16, d26s16, q0s16);
-            __builtin_prefetch(d);
-            __builtin_prefetch(d + dst_stride);
-            q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
-                                    d22s16, d24s16, d26s16, d27s16, q0s16);
-            __builtin_prefetch(d + dst_stride * 2);
-            __builtin_prefetch(d + dst_stride * 3);
-            q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u8 = vqmovn_u16(q1u16);
-            d3u8 = vqmovn_u16(q2u16);
-
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
-
-            q1u8 = vrhaddq_u8(q1u8, q3u8);
-
-            d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
-            d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
-
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-            d += dst_stride;
-
-            q8u16 = q10u16;
-            d18s16 = d22s16;
-            d19s16 = d24s16;
-            q10u16 = q13u16;
-            d22s16 = d25s16;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/arm/neon/vp9_convolve8_neon.c b/vp9/common/arm/neon/vp9_convolve8_neon.c

deleted file mode 100644 (file)

index 5c555c4..0000000
--- a/vp9/common/arm/neon/vp9_convolve8_neon.c
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-#include "./vpx_config.h"
-#include "vpx_ports/mem.h"
-
-void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x, int x_step_q4,
-                           const int16_t *filter_y, int y_step_q4,
-                           int w, int h);
-void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x, int x_step_q4,
-                           const int16_t *filter_y, int y_step_q4,
-                           int w, int h);
-
-static INLINE int32x4_t MULTIPLY_BY_Q0(
-        int16x4_t dsrc0,
-        int16x4_t dsrc1,
-        int16x4_t dsrc2,
-        int16x4_t dsrc3,
-        int16x4_t dsrc4,
-        int16x4_t dsrc5,
-        int16x4_t dsrc6,
-        int16x4_t dsrc7,
-        int16x8_t q0s16) {
-    int32x4_t qdst;
-    int16x4_t d0s16, d1s16;
-
-    d0s16 = vget_low_s16(q0s16);
-    d1s16 = vget_high_s16(q0s16);
-
-    qdst = vmull_lane_s16(dsrc0, d0s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
-    qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
-    qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
-    qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
-    qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
-    return qdst;
-}
-
-void vp9_convolve8_horiz_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,
-        int x_step_q4,
-        const int16_t *filter_y,  // unused
-        int y_step_q4,            // unused
-        int w,
-        int h) {
-    int width;
-    uint8_t *s, *d, *psrc, *pdst;
-    uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
-    uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32;
-    uint8x16_t q12u8, q13u8, q14u8, q15u8;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-    uint16x8x2_t q0x2u16;
-    uint8x8x2_t d0x2u8, d1x2u8;
-    uint32x2x2_t d0x2u32;
-    uint16x4x2_t d0x2u16, d1x2u16;
-    uint32x4x2_t q0x2u32;
-
-    if (x_step_q4 != 16) {
-        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    q0s16 = vld1q_s16(filter_x);
-
-    src -= 3;  // adjust for taps
-    for (; h > 0; h -= 4,
-        src += src_stride * 4,
-        dst += dst_stride * 4) {  // loop_horiz_v
-        s = src;
-        d24u8 = vld1_u8(s);
-        s += src_stride;
-        d25u8 = vld1_u8(s);
-        s += src_stride;
-        d26u8 = vld1_u8(s);
-        s += src_stride;
-        d27u8 = vld1_u8(s);
-
-        q12u8 = vcombine_u8(d24u8, d25u8);
-        q13u8 = vcombine_u8(d26u8, d27u8);
-
-        q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
-                            vreinterpretq_u16_u8(q13u8));
-        d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
-        d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
-        d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
-        d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
-        d0x2u8 = vtrn_u8(d24u8, d25u8);
-        d1x2u8 = vtrn_u8(d26u8, d27u8);
-
-        __builtin_prefetch(src + src_stride * 4);
-        __builtin_prefetch(src + src_stride * 5);
-        __builtin_prefetch(src + src_stride * 6);
-
-        q8u16  = vmovl_u8(d0x2u8.val[0]);
-        q9u16  = vmovl_u8(d0x2u8.val[1]);
-        q10u16 = vmovl_u8(d1x2u8.val[0]);
-        q11u16 = vmovl_u8(d1x2u8.val[1]);
-
-        d16u16 = vget_low_u16(q8u16);
-        d17u16 = vget_high_u16(q8u16);
-        d18u16 = vget_low_u16(q9u16);
-        d19u16 = vget_high_u16(q9u16);
-        q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
-        q9u16 = vcombine_u16(d17u16, d19u16);
-
-        d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-        d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
-        for (width = w, psrc = src + 7, pdst = dst;
-             width > 0;
-             width -= 4, psrc += 4, pdst += 4) {  // loop_horiz
-            s = psrc;
-            d28u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d29u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d31u32 = vld1_dup_u32((const uint32_t *)s);
-            s += src_stride;
-            d30u32 = vld1_dup_u32((const uint32_t *)s);
-
-            __builtin_prefetch(psrc + 64);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
-                               vreinterpret_u16_u32(d31u32));
-            d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
-                               vreinterpret_u16_u32(d30u32));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
-                             vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
-            d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
-                             vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
-
-            __builtin_prefetch(psrc + 64 + src_stride);
-
-            q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
-            q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
-            q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
-                                vreinterpretq_u32_u8(q15u8));
-
-            d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
-            d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
-            q12u16 = vmovl_u8(d28u8);
-            q13u16 = vmovl_u8(d29u8);
-
-            __builtin_prefetch(psrc + 64 + src_stride * 2);
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-            d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-            d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
-                                    d18s16, d19s16, d23s16, d24s16, q0s16);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
-                                    d19s16, d23s16, d24s16, d26s16, q0s16);
-            q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
-                                    d23s16, d24s16, d26s16, d27s16, q0s16);
-            q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            __builtin_prefetch(psrc + 60 + src_stride * 3);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u8 = vqmovn_u16(q1u16);
-            d3u8 = vqmovn_u16(q2u16);
-
-            d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
-                               vreinterpret_u16_u8(d3u8));
-            d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
-                               vreinterpret_u32_u16(d0x2u16.val[1]));
-            d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
-                             vreinterpret_u8_u32(d0x2u32.val[1]));
-
-            d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]);
-            d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]);
-
-            d = pdst;
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-
-            q8u16 = q9u16;
-            d20s16 = d23s16;
-            q11u16 = q12u16;
-            q9u16 = q13u16;
-            d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-        }
-    }
-    return;
-}
-
-void vp9_convolve8_vert_neon(
-        uint8_t *src,
-        ptrdiff_t src_stride,
-        uint8_t *dst,
-        ptrdiff_t dst_stride,
-        const int16_t *filter_x,  // unused
-        int x_step_q4,            // unused
-        const int16_t *filter_y,
-        int y_step_q4,
-        int w,
-        int h) {
-    int height;
-    uint8_t *s, *d;
-    uint32x2_t d2u32, d3u32;
-    uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
-    int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
-    int16x4_t d24s16, d25s16, d26s16, d27s16;
-    uint16x4_t d2u16, d3u16, d4u16, d5u16;
-    int16x8_t q0s16;
-    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
-    int32x4_t q1s32, q2s32, q14s32, q15s32;
-
-    if (y_step_q4 != 16) {
-        vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4, w, h);
-        return;
-    }
-
-    src -= src_stride * 3;
-    q0s16 = vld1q_s16(filter_y);
-    for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
-        s = src;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
-        s += src_stride;
-        d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
-        s += src_stride;
-        d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
-        s += src_stride;
-        d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
-        s += src_stride;
-        d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
-        s += src_stride;
-
-        q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
-        q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
-        q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
-        q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
-
-        d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
-        d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
-        d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-        d = dst;
-        for (height = h; height > 0; height -= 4) {  // loop_vert
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
-            s += src_stride;
-            d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
-            s += src_stride;
-            d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
-            s += src_stride;
-
-            q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
-            q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
-
-            d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
-            d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
-            d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
-            d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
-            d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-            d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-            d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-            d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-
-            __builtin_prefetch(d);
-            __builtin_prefetch(d + dst_stride);
-            q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
-                                    d20s16, d21s16, d22s16, d24s16, q0s16);
-            __builtin_prefetch(d + dst_stride * 2);
-            __builtin_prefetch(d + dst_stride * 3);
-            q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
-                                    d21s16, d22s16, d24s16, d26s16, q0s16);
-            __builtin_prefetch(s);
-            __builtin_prefetch(s + src_stride);
-            q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
-                                    d22s16, d24s16, d26s16, d27s16, q0s16);
-            __builtin_prefetch(s + src_stride * 2);
-            __builtin_prefetch(s + src_stride * 3);
-            q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
-                                    d24s16, d26s16, d27s16, d25s16, q0s16);
-
-            d2u16 = vqrshrun_n_s32(q1s32, 7);
-            d3u16 = vqrshrun_n_s32(q2s32, 7);
-            d4u16 = vqrshrun_n_s32(q14s32, 7);
-            d5u16 = vqrshrun_n_s32(q15s32, 7);
-
-            q1u16 = vcombine_u16(d2u16, d3u16);
-            q2u16 = vcombine_u16(d4u16, d5u16);
-
-            d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16));
-            d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16));
-
-            vst1_lane_u32((uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 0);
-            d += dst_stride;
-            vst1_lane_u32((uint32_t *)d, d3u32, 1);
-            d += dst_stride;
-
-            q8u16 = q10u16;
-            d18s16 = d22s16;
-            d19s16 = d24s16;
-            q10u16 = q13u16;
-            d22s16 = d25s16;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/arm/neon/vp9_convolve_avg_neon.c b/vp9/common/arm/neon/vp9_convolve_avg_neon.c

deleted file mode 100644 (file)

index 3a3db35..0000000
--- a/vp9/common/arm/neon/vp9_convolve_avg_neon.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-void vp9_convolve_avg_neon(
-        const uint8_t *src,    // r0
-        ptrdiff_t src_stride,  // r1
-        uint8_t *dst,          // r2
-        ptrdiff_t dst_stride,  // r3
-        const int16_t *filter_x,
-        int filter_x_stride,
-        const int16_t *filter_y,
-        int filter_y_stride,
-        int w,
-        int h) {
-    uint8_t *d;
-    uint8x8_t d0u8, d1u8, d2u8, d3u8;
-    uint32x2_t d0u32, d2u32;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8;
-    (void)filter_x;  (void)filter_x_stride;
-    (void)filter_y;  (void)filter_y_stride;
-
-    d = dst;
-    if (w > 32) {  // avg64
-        for (; h > 0; h -= 1) {
-            q0u8  = vld1q_u8(src);
-            q1u8  = vld1q_u8(src + 16);
-            q2u8  = vld1q_u8(src + 32);
-            q3u8  = vld1q_u8(src + 48);
-            src += src_stride;
-            q8u8  = vld1q_u8(d);
-            q9u8  = vld1q_u8(d + 16);
-            q10u8 = vld1q_u8(d + 32);
-            q11u8 = vld1q_u8(d + 48);
-            d += dst_stride;
-
-            q0u8 = vrhaddq_u8(q0u8, q8u8);
-            q1u8 = vrhaddq_u8(q1u8, q9u8);
-            q2u8 = vrhaddq_u8(q2u8, q10u8);
-            q3u8 = vrhaddq_u8(q3u8, q11u8);
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            vst1q_u8(dst + 32, q2u8);
-            vst1q_u8(dst + 48, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w == 32) {  // avg32
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            q1u8 = vld1q_u8(src + 16);
-            src += src_stride;
-            q2u8 = vld1q_u8(src);
-            q3u8 = vld1q_u8(src + 16);
-            src += src_stride;
-            q8u8 = vld1q_u8(d);
-            q9u8 = vld1q_u8(d + 16);
-            d += dst_stride;
-            q10u8 = vld1q_u8(d);
-            q11u8 = vld1q_u8(d + 16);
-            d += dst_stride;
-
-            q0u8 = vrhaddq_u8(q0u8, q8u8);
-            q1u8 = vrhaddq_u8(q1u8, q9u8);
-            q2u8 = vrhaddq_u8(q2u8, q10u8);
-            q3u8 = vrhaddq_u8(q3u8, q11u8);
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q2u8);
-            vst1q_u8(dst + 16, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w > 8) {  // avg16
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            src += src_stride;
-            q1u8 = vld1q_u8(src);
-            src += src_stride;
-            q2u8 = vld1q_u8(d);
-            d += dst_stride;
-            q3u8 = vld1q_u8(d);
-            d += dst_stride;
-
-            q0u8 = vrhaddq_u8(q0u8, q2u8);
-            q1u8 = vrhaddq_u8(q1u8, q3u8);
-
-            vst1q_u8(dst, q0u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q1u8);
-            dst += dst_stride;
-        }
-    } else if (w == 8) {  // avg8
-        for (; h > 0; h -= 2) {
-            d0u8 = vld1_u8(src);
-            src += src_stride;
-            d1u8 = vld1_u8(src);
-            src += src_stride;
-            d2u8 = vld1_u8(d);
-            d += dst_stride;
-            d3u8 = vld1_u8(d);
-            d += dst_stride;
-
-            q0u8 = vcombine_u8(d0u8, d1u8);
-            q1u8 = vcombine_u8(d2u8, d3u8);
-            q0u8 = vrhaddq_u8(q0u8, q1u8);
-
-            vst1_u8(dst, vget_low_u8(q0u8));
-            dst += dst_stride;
-            vst1_u8(dst, vget_high_u8(q0u8));
-            dst += dst_stride;
-        }
-    } else {  // avg4
-        for (; h > 0; h -= 2) {
-            d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0);
-            src += src_stride;
-            d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1);
-            src += src_stride;
-            d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0);
-            d += dst_stride;
-            d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1);
-            d += dst_stride;
-
-            d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32),
-                             vreinterpret_u8_u32(d2u32));
-
-            d0u32 = vreinterpret_u32_u8(d0u8);
-            vst1_lane_u32((uint32_t *)dst, d0u32, 0);
-            dst += dst_stride;
-            vst1_lane_u32((uint32_t *)dst, d0u32, 1);
-            dst += dst_stride;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/arm/neon/vp9_copy_neon.c b/vp9/common/arm/neon/vp9_copy_neon.c

deleted file mode 100644 (file)

index f334abe..0000000
--- a/vp9/common/arm/neon/vp9_copy_neon.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stddef.h>
-#include <arm_neon.h>
-
-void vp9_convolve_copy_neon(
-        const uint8_t *src,    // r0
-        ptrdiff_t src_stride,  // r1
-        uint8_t *dst,          // r2
-        ptrdiff_t dst_stride,  // r3
-        const int16_t *filter_x,
-        int filter_x_stride,
-        const int16_t *filter_y,
-        int filter_y_stride,
-        int w,
-        int h) {
-    uint8x8_t d0u8, d2u8;
-    uint8x16_t q0u8, q1u8, q2u8, q3u8;
-    (void)filter_x;  (void)filter_x_stride;
-    (void)filter_y;  (void)filter_y_stride;
-
-    if (w > 32) {  // copy64
-        for (; h > 0; h--) {
-            q0u8 = vld1q_u8(src);
-            q1u8 = vld1q_u8(src + 16);
-            q2u8 = vld1q_u8(src + 32);
-            q3u8 = vld1q_u8(src + 48);
-            src += src_stride;
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            vst1q_u8(dst + 32, q2u8);
-            vst1q_u8(dst + 48, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w == 32) {  // copy32
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            q1u8 = vld1q_u8(src + 16);
-            src += src_stride;
-            q2u8 = vld1q_u8(src);
-            q3u8 = vld1q_u8(src + 16);
-            src += src_stride;
-
-            vst1q_u8(dst, q0u8);
-            vst1q_u8(dst + 16, q1u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q2u8);
-            vst1q_u8(dst + 16, q3u8);
-            dst += dst_stride;
-        }
-    } else if (w > 8) {  // copy16
-        for (; h > 0; h -= 2) {
-            q0u8 = vld1q_u8(src);
-            src += src_stride;
-            q1u8 = vld1q_u8(src);
-            src += src_stride;
-
-            vst1q_u8(dst, q0u8);
-            dst += dst_stride;
-            vst1q_u8(dst, q1u8);
-            dst += dst_stride;
-        }
-    } else if (w == 8) {  // copy8
-        for (; h > 0; h -= 2) {
-            d0u8 = vld1_u8(src);
-            src += src_stride;
-            d2u8 = vld1_u8(src);
-            src += src_stride;
-
-            vst1_u8(dst, d0u8);
-            dst += dst_stride;
-            vst1_u8(dst, d2u8);
-            dst += dst_stride;
-        }
-    } else {  // copy4
-        for (; h > 0; h--) {
-            *(uint32_t *)dst = *(const uint32_t *)src;
-            src += src_stride;
-            dst += dst_stride;
-        }
-    }
-    return;
-}
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h

index 371738a..0285be1 100644 (file)
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -11,9 +11,10 @@
  #ifndef VP9_COMMON_VP9_ENTROPYMODE_H_
  #define VP9_COMMON_VP9_ENTROPYMODE_H_
  
-#include "vp9/common/vp9_filter.h"
  #include "vp9/common/vp9_entropy.h"
  #include "vp9/common/vp9_entropymv.h"
+#include "vp9/common/vp9_filter.h"
+#include "vpx_dsp/vpx_filter.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h

index 40d6a0d..efa24bc 100644 (file)
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -13,6 +13,7 @@
  
  #include "./vpx_config.h"
  #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_filter.h"
  #include "vpx_ports/mem.h"
  
  
@@ -20,13 +21,6 @@
  extern "C" {
  #endif
  
-#define FILTER_BITS 7
-
-#define SUBPEL_BITS 4
-#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
-#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
-#define SUBPEL_TAPS 8
-
  #define EIGHTTAP            0
  #define EIGHTTAP_SMOOTH     1
  #define EIGHTTAP_SHARP      2
@@ -36,9 +30,8 @@ extern "C" {
  // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
  #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
  #define SWITCHABLE 4 /* should be the last one */
-typedef uint8_t INTERP_FILTER;
  
-typedef int16_t InterpKernel[SUBPEL_TAPS];
+typedef uint8_t INTERP_FILTER;
  
  extern const InterpKernel *vp9_filter_kernels[4];
  
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h

index 2aa8ee9..1a4ea2f 100644 (file)
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -15,6 +15,9 @@
  
  #include "./vpx_config.h"
  #include "vpx_dsp/txfm_common.h"
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  #include "vpx_ports/mem.h"
  #include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_enums.h"
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c

index 6d38fab..db9971d 100644 (file)
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -16,7 +16,6 @@
  #include "vpx/vpx_integer.h"
  
  #include "vp9/common/vp9_blockd.h"
-#include "vp9/common/vp9_filter.h"
  #include "vp9/common/vp9_reconinter.h"
  #include "vp9/common/vp9_reconintra.h"
  
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h

index 7f63f5a..9bc6290 100644 (file)
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -11,8 +11,10 @@
  #ifndef VP9_COMMON_VP9_RECONINTER_H_
  #define VP9_COMMON_VP9_RECONINTER_H_
  
-#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_filter.h"
  #include "vp9/common/vp9_onyxc_int.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_filter.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c

index d87a043..46f08a7 100644 (file)
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -11,6 +11,9 @@
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  #include "vpx_mem/vpx_mem.h"
  #include "vpx_ports/mem.h"
  #include "vpx_ports/vpx_once.h"
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index ee99d7a..883733f 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -54,12 +54,6 @@ if ($opts{arch} eq "x86_64") {
    $avx2_x86_64 = 'avx2';
  }
  
-# optimizations which depend on multiple features
-$avx2_ssse3 = '';
-if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
-  $avx2_ssse3 = 'avx2';
-}
-
  #
  # post proc
  #
@@ -88,33 +82,6 @@ specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
  }
  
  #
-# Sub Pixel Filters
-#
-add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
-
-add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
-
-add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
-
-add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2 msa/;
-
-add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
-
-add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
-
-#
  # dct
  #
  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
diff --git a/vp9/common/vp9_scale.c b/vp9/common/vp9_scale.c

index 6db8f9c..8f5c72e 100644 (file)
--- a/vp9/common/vp9_scale.c
+++ b/vp9/common/vp9_scale.c
@@ -8,9 +8,10 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
  #include "vp9/common/vp9_filter.h"
  #include "vp9/common/vp9_scale.h"
+#include "vpx_dsp/vpx_filter.h"
  
  static INLINE int scaled_x(int val, const struct scale_factors *sf) {
    return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
@@ -81,85 +82,85 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
    if (sf->x_step_q4 == 16) {
      if (sf->y_step_q4 == 16) {
        // No scaling in either direction.
-      sf->predict[0][0][0] = vp9_convolve_copy;
-      sf->predict[0][0][1] = vp9_convolve_avg;
-      sf->predict[0][1][0] = vp9_convolve8_vert;
-      sf->predict[0][1][1] = vp9_convolve8_avg_vert;
-      sf->predict[1][0][0] = vp9_convolve8_horiz;
-      sf->predict[1][0][1] = vp9_convolve8_avg_horiz;
+      sf->predict[0][0][0] = vpx_convolve_copy;
+      sf->predict[0][0][1] = vpx_convolve_avg;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
      } else {
        // No scaling in x direction. Must always scale in the y direction.
-      sf->predict[0][0][0] = vp9_convolve8_vert;
-      sf->predict[0][0][1] = vp9_convolve8_avg_vert;
-      sf->predict[0][1][0] = vp9_convolve8_vert;
-      sf->predict[0][1][1] = vp9_convolve8_avg_vert;
-      sf->predict[1][0][0] = vp9_convolve8;
-      sf->predict[1][0][1] = vp9_convolve8_avg;
+      sf->predict[0][0][0] = vpx_convolve8_vert;
+      sf->predict[0][0][1] = vpx_convolve8_avg_vert;
+      sf->predict[0][1][0] = vpx_convolve8_vert;
+      sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+      sf->predict[1][0][0] = vpx_convolve8;
+      sf->predict[1][0][1] = vpx_convolve8_avg;
      }
    } else {
      if (sf->y_step_q4 == 16) {
        // No scaling in the y direction. Must always scale in the x direction.
-      sf->predict[0][0][0] = vp9_convolve8_horiz;
-      sf->predict[0][0][1] = vp9_convolve8_avg_horiz;
-      sf->predict[0][1][0] = vp9_convolve8;
-      sf->predict[0][1][1] = vp9_convolve8_avg;
-      sf->predict[1][0][0] = vp9_convolve8_horiz;
-      sf->predict[1][0][1] = vp9_convolve8_avg_horiz;
+      sf->predict[0][0][0] = vpx_convolve8_horiz;
+      sf->predict[0][0][1] = vpx_convolve8_avg_horiz;
+      sf->predict[0][1][0] = vpx_convolve8;
+      sf->predict[0][1][1] = vpx_convolve8_avg;
+      sf->predict[1][0][0] = vpx_convolve8_horiz;
+      sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
      } else {
        // Must always scale in both directions.
-      sf->predict[0][0][0] = vp9_convolve8;
-      sf->predict[0][0][1] = vp9_convolve8_avg;
-      sf->predict[0][1][0] = vp9_convolve8;
-      sf->predict[0][1][1] = vp9_convolve8_avg;
-      sf->predict[1][0][0] = vp9_convolve8;
-      sf->predict[1][0][1] = vp9_convolve8_avg;
+      sf->predict[0][0][0] = vpx_convolve8;
+      sf->predict[0][0][1] = vpx_convolve8_avg;
+      sf->predict[0][1][0] = vpx_convolve8;
+      sf->predict[0][1][1] = vpx_convolve8_avg;
+      sf->predict[1][0][0] = vpx_convolve8;
+      sf->predict[1][0][1] = vpx_convolve8_avg;
      }
    }
    // 2D subpel motion always gets filtered in both directions
-  sf->predict[1][1][0] = vp9_convolve8;
-  sf->predict[1][1][1] = vp9_convolve8_avg;
+  sf->predict[1][1][0] = vpx_convolve8;
+  sf->predict[1][1][1] = vpx_convolve8_avg;
  #if CONFIG_VP9_HIGHBITDEPTH
    if (use_highbd) {
      if (sf->x_step_q4 == 16) {
        if (sf->y_step_q4 == 16) {
          // No scaling in either direction.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve_copy;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve_avg;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8_vert;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg_vert;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8_horiz;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
        } else {
          // No scaling in x direction. Must always scale in the y direction.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve8_vert;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve8_avg_vert;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8_vert;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg_vert;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
        }
      } else {
        if (sf->y_step_q4 == 16) {
          // No scaling in the y direction. Must always scale in the x direction.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve8_horiz;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve8_avg_horiz;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8_horiz;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
        } else {
          // Must always scale in both directions.
-        sf->highbd_predict[0][0][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[0][0][1] = vp9_highbd_convolve8_avg;
-        sf->highbd_predict[0][1][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[0][1][1] = vp9_highbd_convolve8_avg;
-        sf->highbd_predict[1][0][0] = vp9_highbd_convolve8;
-        sf->highbd_predict[1][0][1] = vp9_highbd_convolve8_avg;
+        sf->highbd_predict[0][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+        sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+        sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
        }
      }
      // 2D subpel motion always gets filtered in both directions.
-    sf->highbd_predict[1][1][0] = vp9_highbd_convolve8;
-    sf->highbd_predict[1][1][1] = vp9_highbd_convolve8_avg;
+    sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
+    sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
    }
  #endif
  }
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h

index a1601a7..5e91041 100644 (file)
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -12,7 +12,7 @@
  #define VP9_COMMON_VP9_SCALE_H_
  
  #include "vp9/common/vp9_mv.h"
-#include "vp9/common/vp9_convolve.h"
+#include "vpx_dsp/vpx_convolve.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c

index c6d3bf1..ecebe1e 100644 (file)
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -12,6 +12,7 @@
  #include <stdlib.h>  // qsort()
  
  #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
  #include "./vpx_scale_rtcd.h"
  
  #include "vpx_dsp/bitreader_buffer.h"
diff --git a/vp9/encoder/vp9_blockiness.c b/vp9/encoder/vp9_blockiness.c

index b8629bd..fc3eac6 100644 (file)
--- a/vp9/encoder/vp9_blockiness.c
+++ b/vp9/encoder/vp9_blockiness.c
@@ -8,12 +8,14 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vpx_config.h"
  #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
  #include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_convolve.h"
  #include "vp9/common/vp9_filter.h"
  #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_convolve.h"
+#include "vpx_dsp/vpx_filter.h"
  #include "vpx_ports/mem.h"
  
  static int horizontal_filter(const uint8_t *s) {
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c

index 08134e1..f1d7379 100644 (file)
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -10,6 +10,7 @@
  
  #include <assert.h>
  #include <limits.h>
+#include "./vpx_dsp_rtcd.h"
  #include "vpx_scale/yv12config.h"
  #include "vpx/vpx_integer.h"
  #include "vp9/common/vp9_reconinter.h"
@@ -336,12 +337,12 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
    }
  
    if (decision == FILTER_BLOCK) {
-    vp9_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
+    vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
                        NULL, 0, NULL, 0,
                        num_4x4_blocks_wide_lookup[bs] << 2,
                        num_4x4_blocks_high_lookup[bs] << 2);
    } else {  // COPY_BLOCK
-    vp9_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
+    vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
                        NULL, 0, NULL, 0,
                        num_4x4_blocks_wide_lookup[bs] << 2,
                        num_4x4_blocks_high_lookup[bs] << 2);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c

index 53e747c..8aee227 100644 (file)
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -12,11 +12,12 @@
  #include <stdio.h>
  #include <limits.h>
  
-#include "./vpx_config.h"
  #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  #include "./vpx_scale_rtcd.h"
  #include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/vpx_filter.h"
  #include "vpx_ports/mem.h"
  #include "vpx_ports/vpx_timer.h"
  #include "vpx_scale/vpx_scale.h"
@@ -2580,18 +2581,18 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
  
  #if CONFIG_VP9_HIGHBITDEPTH
          if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
-          vp9_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+          vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
                                 kernel[x_q4 & 0xf], 16 * src_w / dst_w,
                                 kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                                 16 / factor, 16 / factor, bd);
          } else {
-          vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+          vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
                          kernel[x_q4 & 0xf], 16 * src_w / dst_w,
                          kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                          16 / factor, 16 / factor);
          }
  #else
-        vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+        vpx_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
                        kernel[x_q4 & 0xf], 16 * src_w / dst_w,
                        kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                        16 / factor, 16 / factor);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c

index 34bf8a6..00f7dcd 100644 (file)
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1504,15 +1504,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
          this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
  #if CONFIG_VP9_HIGHBITDEPTH
          if (cm->use_highbitdepth)
-          vp9_highbd_convolve_copy(best_pred->data, best_pred->stride,
+          vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
                                     this_mode_pred->data, this_mode_pred->stride,
                                     NULL, 0, NULL, 0, bw, bh, xd->bd);
          else
-          vp9_convolve_copy(best_pred->data, best_pred->stride,
+          vpx_convolve_copy(best_pred->data, best_pred->stride,
                            this_mode_pred->data, this_mode_pred->stride,
                            NULL, 0, NULL, 0, bw, bh);
  #else
-        vp9_convolve_copy(best_pred->data, best_pred->stride,
+        vpx_convolve_copy(best_pred->data, best_pred->stride,
                            this_mode_pred->data, this_mode_pred->stride,
                            NULL, 0, NULL, 0, bw, bh);
  #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -1577,15 +1577,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
      if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) {
  #if CONFIG_VP9_HIGHBITDEPTH
        if (cm->use_highbitdepth)
-        vp9_highbd_convolve_copy(best_pred->data, best_pred->stride,
+        vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
                                   pd->dst.buf, pd->dst.stride, NULL, 0,
                                   NULL, 0, bw, bh, xd->bd);
        else
-        vp9_convolve_copy(best_pred->data, best_pred->stride,
+        vpx_convolve_copy(best_pred->data, best_pred->stride,
                            pd->dst.buf, pd->dst.stride, NULL, 0,
                            NULL, 0, bw, bh);
  #else
-      vp9_convolve_copy(best_pred->data, best_pred->stride,
+      vpx_convolve_copy(best_pred->data, best_pred->stride,
                          pd->dst.buf, pd->dst.stride, NULL, 0,
                          NULL, 0, bw, bh);
  #endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c

index f46cad8..e5ae959 100644 (file)
--- a/vp9/encoder/vp9_resize.c
+++ b/vp9/encoder/vp9_resize.c
@@ -15,6 +15,9 @@
  #include <stdlib.h>
  #include <string.h>
  
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "vpx_dsp/vpx_dsp_common.h"
+#endif  // CONFIG_VP9_HIGHBITDEPTH
  #include "vpx_ports/mem.h"
  #include "vp9/common/vp9_common.h"
  #include "vp9/encoder/vp9_resize.h"
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk

index 78ea63f..339bc8b 100644 (file)
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -13,14 +13,10 @@ VP9_COMMON_SRCS-yes += vp9_iface_common.h
  VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
  VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
  VP9_COMMON_SRCS-yes += common/vp9_blockd.c
-VP9_COMMON_SRCS-yes += common/vp9_convolve.c
-VP9_COMMON_SRCS-yes += common/vp9_convolve.h
  VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c
  VP9_COMMON_SRCS-yes += common/vp9_entropy.c
  VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
  VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
-VP9_COMMON_SRCS-yes += common/vp9_filter.c
-VP9_COMMON_SRCS-yes += common/vp9_filter.h
  VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.c
  VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.h
  VP9_COMMON_SRCS-yes += common/vp9_idct.c
@@ -31,6 +27,8 @@ VP9_COMMON_SRCS-yes += common/vp9_entropy.h
  VP9_COMMON_SRCS-yes += common/vp9_entropymode.h
  VP9_COMMON_SRCS-yes += common/vp9_entropymv.h
  VP9_COMMON_SRCS-yes += common/vp9_enums.h
+VP9_COMMON_SRCS-yes += common/vp9_filter.h
+VP9_COMMON_SRCS-yes += common/vp9_filter.c
  VP9_COMMON_SRCS-yes += common/vp9_idct.h
  VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
  VP9_COMMON_SRCS-yes += common/vp9_thread_common.h
@@ -64,33 +62,16 @@ VP9_COMMON_SRCS-yes += common/vp9_common_data.h
  VP9_COMMON_SRCS-yes += common/vp9_scan.c
  VP9_COMMON_SRCS-yes += common/vp9_scan.h
  
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
  VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
  VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
  VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
  VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
  VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_bilinear_ssse3.asm
-VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_subpixel_8t_intrin_avx2.c
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_intrin_ssse3.c
  ifeq ($(CONFIG_VP9_POSTPROC),yes)
  VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm
  VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
  endif
  
-ifeq ($(CONFIG_USE_X86INC),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
-endif
-
-ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
-endif
-
  # common (c)
  VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_common_dspr2.h
  VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_convolve2_avg_dspr2.c
@@ -113,15 +94,6 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/vp9_itrans32_dspr2.c
  endif
  
  # common (msa)
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_horiz_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_vert_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_horiz_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_vert_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
  VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
  VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
  VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
@@ -151,11 +123,6 @@ endif
  # neon with assembly and intrinsics implementations. If both are available
  # prefer assembly.
  ifeq ($(HAVE_NEON_ASM), yes)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_avg_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon_asm$(ASM)
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon_asm$(ASM)
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon_asm$(ASM)
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
@@ -167,11 +134,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM)
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM)
  else
  ifeq ($(HAVE_NEON), yes)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_avg_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon.c
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon.c
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_add_neon.c
  VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_neon.c
diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/vpx_dsp/arm/vpx_convolve8_avg_neon.c

new file mode 100644 (file)

index 0000000..5464250
--- /dev/null
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
@@ -0,0 +1,393 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+                               uint8_t *dst, ptrdiff_t dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h);
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+                               uint8_t *dst, ptrdiff_t dst_stride,
+                               const int16_t *filter_x, int x_step_q4,
+                               const int16_t *filter_y, int y_step_q4,
+                               int w, int h);
+
+static INLINE int32x4_t MULTIPLY_BY_Q0(
+    int16x4_t dsrc0,
+    int16x4_t dsrc1,
+    int16x4_t dsrc2,
+    int16x4_t dsrc3,
+    int16x4_t dsrc4,
+    int16x4_t dsrc5,
+    int16x4_t dsrc6,
+    int16x4_t dsrc7,
+    int16x8_t q0s16) {
+  int32x4_t qdst;
+  int16x4_t d0s16, d1s16;
+
+  d0s16 = vget_low_s16(q0s16);
+  d1s16 = vget_high_s16(q0s16);
+
+  qdst = vmull_lane_s16(dsrc0, d0s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
+  qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
+  return qdst;
+}
+
+void vpx_convolve8_avg_horiz_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,
+    int x_step_q4,
+    const int16_t *filter_y,  // unused
+    int y_step_q4,            // unused
+    int w,
+    int h) {
+  int width;
+  const uint8_t *s;
+  uint8_t *d;
+  uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
+  uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32;
+  uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+  uint16x8x2_t q0x2u16;
+  uint8x8x2_t d0x2u8, d1x2u8;
+  uint32x2x2_t d0x2u32;
+  uint16x4x2_t d0x2u16, d1x2u16;
+  uint32x4x2_t q0x2u32;
+
+  if (x_step_q4 != 16) {
+    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+                              filter_x, x_step_q4,
+                              filter_y, y_step_q4, w, h);
+    return;
+}
+
+  q0s16 = vld1q_s16(filter_x);
+
+  src -= 3;  // adjust for taps
+  for (; h > 0; h -= 4) {  // loop_horiz_v
+    s = src;
+    d24u8 = vld1_u8(s);
+    s += src_stride;
+    d25u8 = vld1_u8(s);
+    s += src_stride;
+    d26u8 = vld1_u8(s);
+    s += src_stride;
+    d27u8 = vld1_u8(s);
+
+    q12u8 = vcombine_u8(d24u8, d25u8);
+    q13u8 = vcombine_u8(d26u8, d27u8);
+
+    q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
+                        vreinterpretq_u16_u8(q13u8));
+    d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
+    d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
+    d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
+    d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
+    d0x2u8 = vtrn_u8(d24u8, d25u8);
+    d1x2u8 = vtrn_u8(d26u8, d27u8);
+
+    __builtin_prefetch(src + src_stride * 4);
+    __builtin_prefetch(src + src_stride * 5);
+
+    q8u16 = vmovl_u8(d0x2u8.val[0]);
+    q9u16 = vmovl_u8(d0x2u8.val[1]);
+    q10u16 = vmovl_u8(d1x2u8.val[0]);
+    q11u16 = vmovl_u8(d1x2u8.val[1]);
+
+    src += 7;
+    d16u16 = vget_low_u16(q8u16);
+    d17u16 = vget_high_u16(q8u16);
+    d18u16 = vget_low_u16(q9u16);
+    d19u16 = vget_high_u16(q9u16);
+    q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
+    q9u16 = vcombine_u16(d17u16, d19u16);
+
+    d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+    d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
+    for (width = w;
+         width > 0;
+         width -= 4, src += 4, dst += 4) {  // loop_horiz
+      s = src;
+      d28u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d29u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d31u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d30u32 = vld1_dup_u32((const uint32_t *)s);
+
+      __builtin_prefetch(src + 64);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
+                         vreinterpret_u16_u32(d31u32));
+      d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
+                         vreinterpret_u16_u32(d30u32));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
+                       vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
+      d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
+                       vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
+
+      __builtin_prefetch(src + 64 + src_stride);
+
+      q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
+      q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
+      q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
+                          vreinterpretq_u32_u8(q15u8));
+
+      d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
+      d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
+      q12u16 = vmovl_u8(d28u8);
+      q13u16 = vmovl_u8(d29u8);
+
+      __builtin_prefetch(src + 64 + src_stride * 2);
+
+      d = dst;
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
+      d += dst_stride;
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+      d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+      d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
+                              d18s16, d19s16, d23s16, d24s16, q0s16);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
+                              d19s16, d23s16, d24s16, d26s16, q0s16);
+      q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
+                              d23s16, d24s16, d26s16, d27s16, q0s16);
+      q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      __builtin_prefetch(src + 64 + src_stride * 3);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u8 = vqmovn_u16(q1u16);
+      d3u8 = vqmovn_u16(q2u16);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
+                         vreinterpret_u16_u8(d3u8));
+      d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
+                         vreinterpret_u32_u16(d0x2u16.val[1]));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
+                       vreinterpret_u8_u32(d0x2u32.val[1]));
+
+      q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
+      q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
+
+      q1u8 = vrhaddq_u8(q1u8, q3u8);
+
+      d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
+      d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
+
+      d = dst;
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+
+      q8u16 = q9u16;
+      d20s16 = d23s16;
+      q11u16 = q12u16;
+      q9u16 = q13u16;
+      d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+    }
+    src += src_stride * 4 - w - 7;
+    dst += dst_stride * 4 - w;
+  }
+  return;
+}
+
+void vpx_convolve8_avg_vert_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,  // unused
+    int x_step_q4,            // unused
+    const int16_t *filter_y,
+    int y_step_q4,
+    int w,
+    int h) {
+  int height;
+  const uint8_t *s;
+  uint8_t *d;
+  uint8x8_t d2u8, d3u8;
+  uint32x2_t d2u32, d3u32, d6u32, d7u32;
+  uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
+  uint8x16_t q1u8, q3u8;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+
+  if (y_step_q4 != 16) {
+    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+                             filter_x, x_step_q4,
+                             filter_y, y_step_q4, w, h);
+    return;
+  }
+
+  src -= src_stride * 3;
+  q0s16 = vld1q_s16(filter_y);
+  for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
+    s = src;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
+    s += src_stride;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
+    s += src_stride;
+    d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
+    s += src_stride;
+
+    q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
+    q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
+    q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
+    q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
+
+    d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+    d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+    d = dst;
+    for (height = h; height > 0; height -= 4) {  // loop_vert
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
+      s += src_stride;
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
+      s += src_stride;
+
+      q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
+      q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
+
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0);
+      d += dst_stride;
+      d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0);
+      d += dst_stride;
+      d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1);
+      d -= dst_stride * 3;
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+      d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      __builtin_prefetch(s);
+      __builtin_prefetch(s + src_stride);
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
+                              d20s16, d21s16, d22s16, d24s16, q0s16);
+      __builtin_prefetch(s + src_stride * 2);
+      __builtin_prefetch(s + src_stride * 3);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
+                              d21s16, d22s16, d24s16, d26s16, q0s16);
+      __builtin_prefetch(d);
+      __builtin_prefetch(d + dst_stride);
+      q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
+                              d22s16, d24s16, d26s16, d27s16, q0s16);
+      __builtin_prefetch(d + dst_stride * 2);
+      __builtin_prefetch(d + dst_stride * 3);
+      q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u8 = vqmovn_u16(q1u16);
+      d3u8 = vqmovn_u16(q2u16);
+
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32));
+
+      q1u8 = vrhaddq_u8(q1u8, q3u8);
+
+      d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8));
+      d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8));
+
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+      d += dst_stride;
+
+      q8u16 = q10u16;
+      d18s16 = d22s16;
+      d19s16 = d24s16;
+      q10u16 = q13u16;
+      d22s16 = d25s16;
+    }
+  }
+  return;
+}
diff --git a/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm

similarity index 92%

rename from vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm

rename to vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm

index 4d85846..a19f97d 100644 (file)
--- a/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
@@ -17,10 +17,10 @@
      ; VP9_FILTER_WEIGHT == 128
      ; VP9_FILTER_SHIFT == 7
  
-    EXPORT  |vp9_convolve8_avg_horiz_neon|
-    EXPORT  |vp9_convolve8_avg_vert_neon|
-    IMPORT  |vp9_convolve8_avg_horiz_c|
-    IMPORT  |vp9_convolve8_avg_vert_c|
+    EXPORT  |vpx_convolve8_avg_horiz_neon|
+    EXPORT  |vpx_convolve8_avg_vert_neon|
+    IMPORT  |vpx_convolve8_avg_horiz_c|
+    IMPORT  |vpx_convolve8_avg_vert_c|
      ARM
      REQUIRE8
      PRESERVE8
@@ -51,10 +51,10 @@
  ; sp[]int w
  ; sp[]int h
  
-|vp9_convolve8_avg_horiz_neon| PROC
+|vpx_convolve8_avg_horiz_neon| PROC
      ldr             r12, [sp, #4]           ; x_step_q4
      cmp             r12, #16
-    bne             vp9_convolve8_avg_horiz_c
+    bne             vpx_convolve8_avg_horiz_c
  
      push            {r4-r10, lr}
  
@@ -78,7 +78,7 @@
  
      mov             r10, r6                 ; w loop counter
  
-vp9_convolve8_avg_loop_horiz_v
+vpx_convolve8_avg_loop_horiz_v
      vld1.8          {d24}, [r0], r1
      vld1.8          {d25}, [r0], r1
      vld1.8          {d26}, [r0], r1
@@ -101,7 +101,7 @@ vp9_convolve8_avg_loop_horiz_v
  
      add             r0, r0, #3
  
-vp9_convolve8_avg_loop_horiz
+vpx_convolve8_avg_loop_horiz
      add             r5, r0, #64
  
      vld1.32         {d28[]}, [r0], r1
@@ -170,23 +170,23 @@ vp9_convolve8_avg_loop_horiz
      vmov            q9,  q13
  
      subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_avg_loop_horiz
+    bgt             vpx_convolve8_avg_loop_horiz
  
      ; outer loop
      mov             r6, r10                 ; restore w counter
      add             r0, r0, r9              ; src += src_stride * 4 - w
      add             r2, r2, r12             ; dst += dst_stride * 4 - w
      subs            r7, r7, #4              ; h -= 4
-    bgt vp9_convolve8_avg_loop_horiz_v
+    bgt vpx_convolve8_avg_loop_horiz_v
  
      pop             {r4-r10, pc}
  
      ENDP
  
-|vp9_convolve8_avg_vert_neon| PROC
+|vpx_convolve8_avg_vert_neon| PROC
      ldr             r12, [sp, #12]
      cmp             r12, #16
-    bne             vp9_convolve8_avg_vert_c
+    bne             vpx_convolve8_avg_vert_c
  
      push            {r4-r8, lr}
  
@@ -203,7 +203,7 @@ vp9_convolve8_avg_loop_horiz
      lsl             r1, r1, #1
      lsl             r3, r3, #1
  
-vp9_convolve8_avg_loop_vert_h
+vpx_convolve8_avg_loop_vert_h
      mov             r4, r0
      add             r7, r0, r1, asr #1
      mov             r5, r2
@@ -223,7 +223,7 @@ vp9_convolve8_avg_loop_vert_h
      vmovl.u8        q10, d20
      vmovl.u8        q11, d22
  
-vp9_convolve8_avg_loop_vert
+vpx_convolve8_avg_loop_vert
      ; always process a 4x4 block at a time
      vld1.u32        {d24[0]}, [r7], r1
      vld1.u32        {d26[0]}, [r4], r1
@@ -288,13 +288,13 @@ vp9_convolve8_avg_loop_vert
      vmov            d22, d25
  
      subs            r12, r12, #4            ; h -= 4
-    bgt             vp9_convolve8_avg_loop_vert
+    bgt             vpx_convolve8_avg_loop_vert
  
      ; outer loop
      add             r0, r0, #4
      add             r2, r2, #4
      subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_avg_loop_vert_h
+    bgt             vpx_convolve8_avg_loop_vert_h
  
      pop             {r4-r8, pc}
  
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c

new file mode 100644 (file)

index 0000000..6f634b3
--- /dev/null
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -0,0 +1,360 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x, int x_step_q4,
+                           const int16_t *filter_y, int y_step_q4,
+                           int w, int h);
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x, int x_step_q4,
+                           const int16_t *filter_y, int y_step_q4,
+                           int w, int h);
+
+static INLINE int32x4_t MULTIPLY_BY_Q0(
+    int16x4_t dsrc0,
+    int16x4_t dsrc1,
+    int16x4_t dsrc2,
+    int16x4_t dsrc3,
+    int16x4_t dsrc4,
+    int16x4_t dsrc5,
+    int16x4_t dsrc6,
+    int16x4_t dsrc7,
+    int16x8_t q0s16) {
+  int32x4_t qdst;
+  int16x4_t d0s16, d1s16;
+
+  d0s16 = vget_low_s16(q0s16);
+  d1s16 = vget_high_s16(q0s16);
+
+  qdst = vmull_lane_s16(dsrc0, d0s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3);
+  qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0);
+  qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1);
+  qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2);
+  qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3);
+  return qdst;
+}
+
+void vpx_convolve8_horiz_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,
+    int x_step_q4,
+    const int16_t *filter_y,  // unused
+    int y_step_q4,            // unused
+    int w,
+    int h) {
+  int width;
+  const uint8_t *s, *psrc;
+  uint8_t *d, *pdst;
+  uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8;
+  uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32;
+  uint8x16_t q12u8, q13u8, q14u8, q15u8;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+  uint16x8x2_t q0x2u16;
+  uint8x8x2_t d0x2u8, d1x2u8;
+  uint32x2x2_t d0x2u32;
+  uint16x4x2_t d0x2u16, d1x2u16;
+  uint32x4x2_t q0x2u32;
+
+  if (x_step_q4 != 16) {
+    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+                          filter_x, x_step_q4,
+                          filter_y, y_step_q4, w, h);
+    return;
+  }
+
+  q0s16 = vld1q_s16(filter_x);
+
+  src -= 3;  // adjust for taps
+  for (; h > 0; h -= 4,
+    src += src_stride * 4,
+    dst += dst_stride * 4) {  // loop_horiz_v
+    s = src;
+    d24u8 = vld1_u8(s);
+    s += src_stride;
+    d25u8 = vld1_u8(s);
+    s += src_stride;
+    d26u8 = vld1_u8(s);
+    s += src_stride;
+    d27u8 = vld1_u8(s);
+
+    q12u8 = vcombine_u8(d24u8, d25u8);
+    q13u8 = vcombine_u8(d26u8, d27u8);
+
+    q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8),
+                        vreinterpretq_u16_u8(q13u8));
+    d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0]));
+    d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0]));
+    d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1]));
+    d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1]));
+    d0x2u8 = vtrn_u8(d24u8, d25u8);
+    d1x2u8 = vtrn_u8(d26u8, d27u8);
+
+    __builtin_prefetch(src + src_stride * 4);
+    __builtin_prefetch(src + src_stride * 5);
+    __builtin_prefetch(src + src_stride * 6);
+
+    q8u16  = vmovl_u8(d0x2u8.val[0]);
+    q9u16  = vmovl_u8(d0x2u8.val[1]);
+    q10u16 = vmovl_u8(d1x2u8.val[0]);
+    q11u16 = vmovl_u8(d1x2u8.val[1]);
+
+    d16u16 = vget_low_u16(q8u16);
+    d17u16 = vget_high_u16(q8u16);
+    d18u16 = vget_low_u16(q9u16);
+    d19u16 = vget_high_u16(q9u16);
+    q8u16 = vcombine_u16(d16u16, d18u16);  // vswp 17 18
+    q9u16 = vcombine_u16(d17u16, d19u16);
+
+    d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+    d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));  // vmov 23 21
+    for (width = w, psrc = src + 7, pdst = dst;
+         width > 0;
+         width -= 4, psrc += 4, pdst += 4) {  // loop_horiz
+      s = psrc;
+      d28u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d29u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d31u32 = vld1_dup_u32((const uint32_t *)s);
+      s += src_stride;
+      d30u32 = vld1_dup_u32((const uint32_t *)s);
+
+      __builtin_prefetch(psrc + 64);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32),
+                         vreinterpret_u16_u32(d31u32));
+      d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32),
+                         vreinterpret_u16_u32(d30u32));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]),   // d28
+                       vreinterpret_u8_u16(d1x2u16.val[0]));  // d29
+      d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]),   // d31
+                       vreinterpret_u8_u16(d1x2u16.val[1]));  // d30
+
+      __builtin_prefetch(psrc + 64 + src_stride);
+
+      q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]);
+      q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]);
+      q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8),
+                          vreinterpretq_u32_u8(q15u8));
+
+      d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0]));
+      d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0]));
+      q12u16 = vmovl_u8(d28u8);
+      q13u16 = vmovl_u8(d29u8);
+
+      __builtin_prefetch(psrc + 64 + src_stride * 2);
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+      d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+      d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16,
+                              d18s16, d19s16, d23s16, d24s16, q0s16);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16,
+                              d19s16, d23s16, d24s16, d26s16, q0s16);
+      q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16,
+                              d23s16, d24s16, d26s16, d27s16, q0s16);
+      q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      __builtin_prefetch(psrc + 60 + src_stride * 3);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u8 = vqmovn_u16(q1u16);
+      d3u8 = vqmovn_u16(q2u16);
+
+      d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8),
+                         vreinterpret_u16_u8(d3u8));
+      d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]),
+                         vreinterpret_u32_u16(d0x2u16.val[1]));
+      d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]),
+                       vreinterpret_u8_u32(d0x2u32.val[1]));
+
+      d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]);
+      d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]);
+
+      d = pdst;
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+
+      q8u16 = q9u16;
+      d20s16 = d23s16;
+      q11u16 = q12u16;
+      q9u16 = q13u16;
+      d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+    }
+  }
+  return;
+}
+
+void vpx_convolve8_vert_neon(
+    const uint8_t *src,
+    ptrdiff_t src_stride,
+    uint8_t *dst,
+    ptrdiff_t dst_stride,
+    const int16_t *filter_x,  // unused
+    int x_step_q4,            // unused
+    const int16_t *filter_y,
+    int y_step_q4,
+    int w,
+    int h) {
+  int height;
+  const uint8_t *s;
+  uint8_t *d;
+  uint32x2_t d2u32, d3u32;
+  uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32;
+  int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16;
+  int16x4_t d24s16, d25s16, d26s16, d27s16;
+  uint16x4_t d2u16, d3u16, d4u16, d5u16;
+  int16x8_t q0s16;
+  uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
+  int32x4_t q1s32, q2s32, q14s32, q15s32;
+
+  if (y_step_q4 != 16) {
+    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
+                         filter_x, x_step_q4,
+                         filter_y, y_step_q4, w, h);
+    return;
+  }
+
+  src -= src_stride * 3;
+  q0s16 = vld1q_s16(filter_y);
+  for (; w > 0; w -= 4, src += 4, dst += 4) {  // loop_vert_h
+    s = src;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0);
+    s += src_stride;
+    d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0);
+    s += src_stride;
+    d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0);
+    s += src_stride;
+    d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1);
+    s += src_stride;
+    d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0);
+    s += src_stride;
+
+    q8u16  = vmovl_u8(vreinterpret_u8_u32(d16u32));
+    q9u16  = vmovl_u8(vreinterpret_u8_u32(d18u32));
+    q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32));
+    q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32));
+
+    d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16));
+    d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16));
+    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+    d = dst;
+    for (height = h; height > 0; height -= 4) {  // loop_vert
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0);
+      s += src_stride;
+      d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1);
+      s += src_stride;
+      d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1);
+      s += src_stride;
+
+      q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32));
+      q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32));
+
+      d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16));
+      d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16));
+      d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16));
+      d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16));
+      d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+      d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+      d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+      d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+
+      __builtin_prefetch(d);
+      __builtin_prefetch(d + dst_stride);
+      q1s32  = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16,
+                              d20s16, d21s16, d22s16, d24s16, q0s16);
+      __builtin_prefetch(d + dst_stride * 2);
+      __builtin_prefetch(d + dst_stride * 3);
+      q2s32  = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16,
+                              d21s16, d22s16, d24s16, d26s16, q0s16);
+      __builtin_prefetch(s);
+      __builtin_prefetch(s + src_stride);
+      q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16,
+                              d22s16, d24s16, d26s16, d27s16, q0s16);
+      __builtin_prefetch(s + src_stride * 2);
+      __builtin_prefetch(s + src_stride * 3);
+      q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16,
+                              d24s16, d26s16, d27s16, d25s16, q0s16);
+
+      d2u16 = vqrshrun_n_s32(q1s32, 7);
+      d3u16 = vqrshrun_n_s32(q2s32, 7);
+      d4u16 = vqrshrun_n_s32(q14s32, 7);
+      d5u16 = vqrshrun_n_s32(q15s32, 7);
+
+      q1u16 = vcombine_u16(d2u16, d3u16);
+      q2u16 = vcombine_u16(d4u16, d5u16);
+
+      d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16));
+      d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16));
+
+      vst1_lane_u32((uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 0);
+      d += dst_stride;
+      vst1_lane_u32((uint32_t *)d, d3u32, 1);
+      d += dst_stride;
+
+      q8u16 = q10u16;
+      d18s16 = d22s16;
+      d19s16 = d24s16;
+      q10u16 = q13u16;
+      d22s16 = d25s16;
+    }
+  }
+  return;
+}
diff --git a/vp9/common/arm/neon/vp9_convolve8_neon_asm.asm b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm

similarity index 92%

rename from vp9/common/arm/neon/vp9_convolve8_neon_asm.asm

rename to vpx_dsp/arm/vpx_convolve8_neon_asm.asm

index 184c3ad..bc530de 100644 (file)
--- a/vp9/common/arm/neon/vp9_convolve8_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm
@@ -17,10 +17,10 @@
      ; VP9_FILTER_WEIGHT == 128
      ; VP9_FILTER_SHIFT == 7
  
-    EXPORT  |vp9_convolve8_horiz_neon|
-    EXPORT  |vp9_convolve8_vert_neon|
-    IMPORT  |vp9_convolve8_horiz_c|
-    IMPORT  |vp9_convolve8_vert_c|
+    EXPORT  |vpx_convolve8_horiz_neon|
+    EXPORT  |vpx_convolve8_vert_neon|
+    IMPORT  |vpx_convolve8_horiz_c|
+    IMPORT  |vpx_convolve8_vert_c|
      ARM
      REQUIRE8
      PRESERVE8
@@ -51,10 +51,10 @@
  ; sp[]int w
  ; sp[]int h
  
-|vp9_convolve8_horiz_neon| PROC
+|vpx_convolve8_horiz_neon| PROC
      ldr             r12, [sp, #4]           ; x_step_q4
      cmp             r12, #16
-    bne             vp9_convolve8_horiz_c
+    bne             vpx_convolve8_horiz_c
  
      push            {r4-r10, lr}
  
@@ -78,7 +78,7 @@
  
      mov             r10, r6                 ; w loop counter
  
-vp9_convolve8_loop_horiz_v
+vpx_convolve8_loop_horiz_v
      vld1.8          {d24}, [r0], r1
      vld1.8          {d25}, [r0], r1
      vld1.8          {d26}, [r0], r1
@@ -101,7 +101,7 @@ vp9_convolve8_loop_horiz_v
  
      add             r0, r0, #3
  
-vp9_convolve8_loop_horiz
+vpx_convolve8_loop_horiz
      add             r5, r0, #64
  
      vld1.32         {d28[]}, [r0], r1
@@ -159,23 +159,23 @@ vp9_convolve8_loop_horiz
      vmov            q9,  q13
  
      subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_loop_horiz
+    bgt             vpx_convolve8_loop_horiz
  
      ; outer loop
      mov             r6, r10                 ; restore w counter
      add             r0, r0, r9              ; src += src_stride * 4 - w
      add             r2, r2, r12             ; dst += dst_stride * 4 - w
      subs            r7, r7, #4              ; h -= 4
-    bgt vp9_convolve8_loop_horiz_v
+    bgt vpx_convolve8_loop_horiz_v
  
      pop             {r4-r10, pc}
  
      ENDP
  
-|vp9_convolve8_vert_neon| PROC
+|vpx_convolve8_vert_neon| PROC
      ldr             r12, [sp, #12]
      cmp             r12, #16
-    bne             vp9_convolve8_vert_c
+    bne             vpx_convolve8_vert_c
  
      push            {r4-r8, lr}
  
@@ -192,7 +192,7 @@ vp9_convolve8_loop_horiz
      lsl             r1, r1, #1
      lsl             r3, r3, #1
  
-vp9_convolve8_loop_vert_h
+vpx_convolve8_loop_vert_h
      mov             r4, r0
      add             r7, r0, r1, asr #1
      mov             r5, r2
@@ -212,7 +212,7 @@ vp9_convolve8_loop_vert_h
      vmovl.u8        q10, d20
      vmovl.u8        q11, d22
  
-vp9_convolve8_loop_vert
+vpx_convolve8_loop_vert
      ; always process a 4x4 block at a time
      vld1.u32        {d24[0]}, [r7], r1
      vld1.u32        {d26[0]}, [r4], r1
@@ -266,13 +266,13 @@ vp9_convolve8_loop_vert
      vmov            d22, d25
  
      subs            r12, r12, #4            ; h -= 4
-    bgt             vp9_convolve8_loop_vert
+    bgt             vpx_convolve8_loop_vert
  
      ; outer loop
      add             r0, r0, #4
      add             r2, r2, #4
      subs            r6, r6, #4              ; w -= 4
-    bgt             vp9_convolve8_loop_vert_h
+    bgt             vpx_convolve8_loop_vert_h
  
      pop             {r4-r8, pc}
  
diff --git a/vpx_dsp/arm/vpx_convolve_avg_neon.c b/vpx_dsp/arm/vpx_convolve_avg_neon.c

new file mode 100644 (file)

index 0000000..dc58a33
--- /dev/null
+++ b/vpx_dsp/arm/vpx_convolve_avg_neon.c
@@ -0,0 +1,147 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_convolve_avg_neon(
+    const uint8_t *src,    // r0
+    ptrdiff_t src_stride,  // r1
+    uint8_t *dst,          // r2
+    ptrdiff_t dst_stride,  // r3
+    const int16_t *filter_x,
+    int filter_x_stride,
+    const int16_t *filter_y,
+    int filter_y_stride,
+    int w,
+    int h) {
+  uint8_t *d;
+  uint8x8_t d0u8, d1u8, d2u8, d3u8;
+  uint32x2_t d0u32, d2u32;
+  uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8;
+  (void)filter_x;  (void)filter_x_stride;
+  (void)filter_y;  (void)filter_y_stride;
+
+  d = dst;
+  if (w > 32) {  // avg64
+    for (; h > 0; h -= 1) {
+      q0u8  = vld1q_u8(src);
+      q1u8  = vld1q_u8(src + 16);
+      q2u8  = vld1q_u8(src + 32);
+      q3u8  = vld1q_u8(src + 48);
+      src += src_stride;
+      q8u8  = vld1q_u8(d);
+      q9u8  = vld1q_u8(d + 16);
+      q10u8 = vld1q_u8(d + 32);
+      q11u8 = vld1q_u8(d + 48);
+      d += dst_stride;
+
+      q0u8 = vrhaddq_u8(q0u8, q8u8);
+      q1u8 = vrhaddq_u8(q1u8, q9u8);
+      q2u8 = vrhaddq_u8(q2u8, q10u8);
+      q3u8 = vrhaddq_u8(q3u8, q11u8);
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      vst1q_u8(dst + 32, q2u8);
+      vst1q_u8(dst + 48, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w == 32) {  // avg32
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      q1u8 = vld1q_u8(src + 16);
+      src += src_stride;
+      q2u8 = vld1q_u8(src);
+      q3u8 = vld1q_u8(src + 16);
+      src += src_stride;
+      q8u8 = vld1q_u8(d);
+      q9u8 = vld1q_u8(d + 16);
+      d += dst_stride;
+      q10u8 = vld1q_u8(d);
+      q11u8 = vld1q_u8(d + 16);
+      d += dst_stride;
+
+      q0u8 = vrhaddq_u8(q0u8, q8u8);
+      q1u8 = vrhaddq_u8(q1u8, q9u8);
+      q2u8 = vrhaddq_u8(q2u8, q10u8);
+      q3u8 = vrhaddq_u8(q3u8, q11u8);
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q2u8);
+      vst1q_u8(dst + 16, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w > 8) {  // avg16
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      src += src_stride;
+      q1u8 = vld1q_u8(src);
+      src += src_stride;
+      q2u8 = vld1q_u8(d);
+      d += dst_stride;
+      q3u8 = vld1q_u8(d);
+      d += dst_stride;
+
+      q0u8 = vrhaddq_u8(q0u8, q2u8);
+      q1u8 = vrhaddq_u8(q1u8, q3u8);
+
+      vst1q_u8(dst, q0u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q1u8);
+      dst += dst_stride;
+    }
+  } else if (w == 8) {  // avg8
+    for (; h > 0; h -= 2) {
+      d0u8 = vld1_u8(src);
+      src += src_stride;
+      d1u8 = vld1_u8(src);
+      src += src_stride;
+      d2u8 = vld1_u8(d);
+      d += dst_stride;
+      d3u8 = vld1_u8(d);
+      d += dst_stride;
+
+      q0u8 = vcombine_u8(d0u8, d1u8);
+      q1u8 = vcombine_u8(d2u8, d3u8);
+      q0u8 = vrhaddq_u8(q0u8, q1u8);
+
+      vst1_u8(dst, vget_low_u8(q0u8));
+      dst += dst_stride;
+      vst1_u8(dst, vget_high_u8(q0u8));
+      dst += dst_stride;
+    }
+  } else {  // avg4
+    for (; h > 0; h -= 2) {
+      d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0);
+      src += src_stride;
+      d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1);
+      src += src_stride;
+      d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0);
+      d += dst_stride;
+      d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1);
+      d += dst_stride;
+
+      d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32),
+                       vreinterpret_u8_u32(d2u32));
+
+      d0u32 = vreinterpret_u32_u8(d0u8);
+      vst1_lane_u32((uint32_t *)dst, d0u32, 0);
+      dst += dst_stride;
+      vst1_lane_u32((uint32_t *)dst, d0u32, 1);
+      dst += dst_stride;
+    }
+  }
+  return;
+}
diff --git a/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm b/vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm

similarity index 98%

rename from vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm

rename to vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm

index 7d24530..97e6189 100644 (file)
--- a/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm
@@ -8,14 +8,14 @@
  ;  be found in the AUTHORS file in the root of the source tree.
  ;
  
-    EXPORT  |vp9_convolve_avg_neon|
+    EXPORT  |vpx_convolve_avg_neon|
      ARM
      REQUIRE8
      PRESERVE8
  
      AREA ||.text||, CODE, READONLY, ALIGN=2
  
-|vp9_convolve_avg_neon| PROC
+|vpx_convolve_avg_neon| PROC
      push                {r4-r6, lr}
      ldrd                r4, r5, [sp, #32]
      mov                 r6, r2
diff --git a/vpx_dsp/arm/vpx_convolve_copy_neon.c b/vpx_dsp/arm/vpx_convolve_copy_neon.c

new file mode 100644 (file)

index 0000000..d8fb97a
--- /dev/null
+++ b/vpx_dsp/arm/vpx_convolve_copy_neon.c
@@ -0,0 +1,94 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_convolve_copy_neon(
+    const uint8_t *src,    // r0
+    ptrdiff_t src_stride,  // r1
+    uint8_t *dst,          // r2
+    ptrdiff_t dst_stride,  // r3
+    const int16_t *filter_x,
+    int filter_x_stride,
+    const int16_t *filter_y,
+    int filter_y_stride,
+    int w,
+    int h) {
+  uint8x8_t d0u8, d2u8;
+  uint8x16_t q0u8, q1u8, q2u8, q3u8;
+  (void)filter_x;  (void)filter_x_stride;
+  (void)filter_y;  (void)filter_y_stride;
+
+  if (w > 32) {  // copy64
+    for (; h > 0; h--) {
+      q0u8 = vld1q_u8(src);
+      q1u8 = vld1q_u8(src + 16);
+      q2u8 = vld1q_u8(src + 32);
+      q3u8 = vld1q_u8(src + 48);
+      src += src_stride;
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      vst1q_u8(dst + 32, q2u8);
+      vst1q_u8(dst + 48, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w == 32) {  // copy32
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      q1u8 = vld1q_u8(src + 16);
+      src += src_stride;
+      q2u8 = vld1q_u8(src);
+      q3u8 = vld1q_u8(src + 16);
+      src += src_stride;
+
+      vst1q_u8(dst, q0u8);
+      vst1q_u8(dst + 16, q1u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q2u8);
+      vst1q_u8(dst + 16, q3u8);
+      dst += dst_stride;
+    }
+  } else if (w > 8) {  // copy16
+    for (; h > 0; h -= 2) {
+      q0u8 = vld1q_u8(src);
+      src += src_stride;
+      q1u8 = vld1q_u8(src);
+      src += src_stride;
+
+      vst1q_u8(dst, q0u8);
+      dst += dst_stride;
+      vst1q_u8(dst, q1u8);
+      dst += dst_stride;
+    }
+  } else if (w == 8) {  // copy8
+    for (; h > 0; h -= 2) {
+      d0u8 = vld1_u8(src);
+      src += src_stride;
+      d2u8 = vld1_u8(src);
+      src += src_stride;
+
+      vst1_u8(dst, d0u8);
+      dst += dst_stride;
+      vst1_u8(dst, d2u8);
+      dst += dst_stride;
+    }
+  } else {  // copy4
+    for (; h > 0; h--) {
+      *(uint32_t *)dst = *(const uint32_t *)src;
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+  return;
+}
diff --git a/vp9/common/arm/neon/vp9_copy_neon_asm.asm b/vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm

similarity index 97%

rename from vp9/common/arm/neon/vp9_copy_neon_asm.asm

rename to vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm

index a0bd04a..89164ad 100644 (file)
--- a/vp9/common/arm/neon/vp9_copy_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm
@@ -8,14 +8,14 @@
  ;  be found in the AUTHORS file in the root of the source tree.
  ;
  
-    EXPORT  |vp9_convolve_copy_neon|
+    EXPORT  |vpx_convolve_copy_neon|
      ARM
      REQUIRE8
      PRESERVE8
  
      AREA ||.text||, CODE, READONLY, ALIGN=2
  
-|vp9_convolve_copy_neon| PROC
+|vpx_convolve_copy_neon| PROC
      push                {r4-r5, lr}
      ldrd                r4, r5, [sp, #28]
  
diff --git a/vp9/common/arm/neon/vp9_convolve_neon.c b/vpx_dsp/arm/vpx_convolve_neon.c

similarity index 85%

rename from vp9/common/arm/neon/vp9_convolve_neon.c

rename to vpx_dsp/arm/vpx_convolve_neon.c

index 2e28cb2..2c0f7e9 100644 (file)
--- a/vp9/common/arm/neon/vp9_convolve_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_neon.c
@@ -8,11 +8,11 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
  #include "vpx_ports/mem.h"
  
-void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
                          uint8_t *dst, ptrdiff_t dst_stride,
                          const int16_t *filter_x, int x_step_q4,
                          const int16_t *filter_y, int y_step_q4,
@@ -26,7 +26,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
    int intermediate_height = h + 7;
  
    if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_c(src, src_stride,
+    vpx_convolve8_c(src, src_stride,
                      dst, dst_stride,
                      filter_x, x_step_q4,
                      filter_y, y_step_q4,
@@ -39,19 +39,19 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
     * the temp buffer which has lots of extra room and is subsequently discarded
     * this is safe if somewhat less than ideal.
     */
-  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+  vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,
                             temp, 64,
                             filter_x, x_step_q4, filter_y, y_step_q4,
                             w, intermediate_height);
  
    /* Step into the temp buffer 3 lines to get the actual frame data */
-  vp9_convolve8_vert_neon(temp + 64 * 3, 64,
+  vpx_convolve8_vert_neon(temp + 64 * 3, 64,
                            dst, dst_stride,
                            filter_x, x_step_q4, filter_y, y_step_q4,
                            w, h);
  }
  
-void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
@@ -60,7 +60,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
    int intermediate_height = h + 7;
  
    if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vp9_convolve8_avg_c(src, src_stride,
+    vpx_convolve8_avg_c(src, src_stride,
                          dst, dst_stride,
                          filter_x, x_step_q4,
                          filter_y, y_step_q4,
@@ -71,11 +71,11 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
    /* This implementation has the same issues as above. In addition, we only want
     * to average the values after both passes.
     */
-  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+  vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,
                             temp, 64,
                             filter_x, x_step_q4, filter_y, y_step_q4,
                             w, intermediate_height);
-  vp9_convolve8_avg_vert_neon(temp + 64 * 3,
+  vpx_convolve8_avg_vert_neon(temp + 64 * 3,
                                64, dst, dst_stride,
                                filter_x, x_step_q4, filter_y, y_step_q4,
                                w, h);
diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c

index dc8aca5..66f4d95 100644 (file)
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -8,6 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
+#include <stdlib.h>
+
  #include "./vpx_config.h"
  #include "vpx_dsp/vpx_dsp_common.h"
  #include "vpx_ports/mem.h"
diff --git a/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c

similarity index 98%

rename from vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c

rename to vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c

index 89364cb..c5ad08c 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
@@ -8,8 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
  
  static void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src,
                                                int32_t src_stride,
@@ -687,7 +687,7 @@ static void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src,
    }
  }
  
-void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                   uint8_t *dst, ptrdiff_t dst_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
@@ -695,14 +695,14 @@ void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
    int8_t cnt, filt_hor[8];
  
    if (16 != x_step_q4) {
-    vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                filter_x, x_step_q4, filter_y, y_step_q4,
                                w, h);
      return;
    }
  
    if (((const int32_t *)filter_x)[1] == 0x800000) {
-    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+    vpx_convolve_avg(src, src_stride, dst, dst_stride,
                       filter_x, x_step_q4, filter_y, y_step_q4,
                       w, h);
      return;
@@ -740,7 +740,7 @@ void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                            &filt_hor[3], h);
          break;
        default:
-        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                    filter_x, x_step_q4, filter_y, y_step_q4,
                                    w, h);
          break;
@@ -773,7 +773,7 @@ void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                            filt_hor, h);
          break;
        default:
-        vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
                                    filter_x, x_step_q4, filter_y, y_step_q4,
                                    w, h);
          break;
diff --git a/vp9/common/mips/msa/vp9_convolve8_avg_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_msa.c

similarity index 98%

rename from vp9/common/mips/msa/vp9_convolve8_avg_msa.c

rename to vpx_dsp/mips/vpx_convolve8_avg_msa.c

index e9f3a9d..f46df67 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve8_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
@@ -8,8 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
  
  static void common_hv_8ht_8vt_and_aver_dst_4w_msa(const uint8_t *src,
                                                    int32_t src_stride,
@@ -576,7 +576,7 @@ static void common_hv_2ht_2vt_and_aver_dst_64w_msa(const uint8_t *src,
    }
  }
  
-void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int x_step_q4,
                             const int16_t *filter_y, int y_step_q4,
@@ -584,7 +584,7 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
    int8_t cnt, filt_hor[8], filt_ver[8];
  
    if (16 != x_step_q4 || 16 != y_step_q4) {
-    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                          filter_x, x_step_q4, filter_y, y_step_q4,
                          w, h);
      return;
@@ -592,7 +592,7 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
  
    if (((const int32_t *)filter_x)[1] == 0x800000 &&
        ((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+    vpx_convolve_avg(src, src_stride, dst, dst_stride,
                       filter_x, x_step_q4, filter_y, y_step_q4,
                       w, h);
      return;
@@ -632,14 +632,14 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                                                 &filt_hor[3], &filt_ver[3], h);
          break;
        default:
-        vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, x_step_q4, filter_y, y_step_q4,
                              w, h);
          break;
      }
    } else if (((const int32_t *)filter_x)[0] == 0 ||
               ((const int32_t *)filter_y)[0] == 0) {
-    vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                          filter_x, x_step_q4, filter_y, y_step_q4,
                          w, h);
    } else {
@@ -670,7 +670,7 @@ void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                                                 filt_hor, filt_ver, h);
          break;
        default:
-        vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
                              filter_x, x_step_q4, filter_y, y_step_q4,
                              w, h);
          break;
diff --git a/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c

similarity index 98%

rename from vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c

rename to vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c

index 85dfd30..4d4b3d9 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
@@ -8,8 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
  
  static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
                                               int32_t src_stride,
@@ -657,7 +657,7 @@ static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src,
    }
  }
  
-void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
@@ -665,14 +665,14 @@ void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
    int8_t cnt, filt_ver[8];
  
    if (16 != y_step_q4) {
-    vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
      return;
    }
  
    if (((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_avg(src, src_stride, dst, dst_stride,
+    vpx_convolve_avg(src, src_stride, dst, dst_stride,
                       filter_x, x_step_q4, filter_y, y_step_q4,
                       w, h);
      return;
@@ -710,7 +710,7 @@ void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                            &filt_ver[3], h);
          break;
        default:
-        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                   filter_x, x_step_q4, filter_y, y_step_q4,
                                   w, h);
          break;
@@ -744,7 +744,7 @@ void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                            filt_ver, h);
          break;
        default:
-        vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
                                   filter_x, x_step_q4, filter_y, y_step_q4,
                                   w, h);
          break;
diff --git a/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c

similarity index 98%

rename from vp9/common/mips/msa/vp9_convolve8_horiz_msa.c

rename to vpx_dsp/mips/vpx_convolve8_horiz_msa.c

index f175bf9..2bb314d 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
@@ -8,8 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
  
  static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
                                   uint8_t *dst, int32_t dst_stride,
@@ -647,7 +647,7 @@ static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
    }
  }
  
-void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
@@ -655,14 +655,14 @@ void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
    int8_t cnt, filt_hor[8];
  
    if (16 != x_step_q4) {
-    vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                            filter_x, x_step_q4, filter_y, y_step_q4,
                            w, h);
      return;
    }
  
    if (((const int32_t *)filter_x)[1] == 0x800000) {
-    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+    vpx_convolve_copy(src, src_stride, dst, dst_stride,
                        filter_x, x_step_q4, filter_y, y_step_q4,
                        w, h);
      return;
@@ -700,7 +700,7 @@ void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                               &filt_hor[3], h);
          break;
        default:
-        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                filter_x, x_step_q4, filter_y, y_step_q4,
                                w, h);
          break;
@@ -733,7 +733,7 @@ void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                               filt_hor, h);
          break;
        default:
-        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
                                filter_x, x_step_q4, filter_y, y_step_q4,
                                w, h);
          break;
diff --git a/vp9/common/mips/msa/vp9_convolve8_msa.c b/vpx_dsp/mips/vpx_convolve8_msa.c

similarity index 98%

rename from vp9/common/mips/msa/vp9_convolve8_msa.c

rename to vpx_dsp/mips/vpx_convolve8_msa.c

index b1279d9..52f2028 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve8_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_msa.c
@@ -8,8 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
  
  const uint8_t mc_filt_mask_arr[16 * 3] = {
    /* 8 width cases */
@@ -551,7 +551,7 @@ static void common_hv_2ht_2vt_64w_msa(const uint8_t *src, int32_t src_stride,
    }
  }
  
-void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                         uint8_t *dst, ptrdiff_t dst_stride,
                         const int16_t *filter_x, int32_t x_step_q4,
                         const int16_t *filter_y, int32_t y_step_q4,
@@ -559,7 +559,7 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
    int8_t cnt, filt_hor[8], filt_ver[8];
  
    if (16 != x_step_q4 || 16 != y_step_q4) {
-    vp9_convolve8_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_c(src, src_stride, dst, dst_stride,
                      filter_x, x_step_q4, filter_y, y_step_q4,
                      w, h);
      return;
@@ -567,7 +567,7 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
  
    if (((const int32_t *)filter_x)[1] == 0x800000 &&
        ((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+    vpx_convolve_copy(src, src_stride, dst, dst_stride,
                        filter_x, x_step_q4, filter_y, y_step_q4,
                        w, h);
      return;
@@ -607,14 +607,14 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                                    &filt_hor[3], &filt_ver[3], (int32_t)h);
          break;
        default:
-        vp9_convolve8_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_c(src, src_stride, dst, dst_stride,
                          filter_x, x_step_q4, filter_y, y_step_q4,
                          w, h);
          break;
      }
    } else if (((const int32_t *)filter_x)[0] == 0 ||
               ((const int32_t *)filter_y)[0] == 0) {
-    vp9_convolve8_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_c(src, src_stride, dst, dst_stride,
                      filter_x, x_step_q4, filter_y, y_step_q4,
                      w, h);
    } else {
@@ -645,7 +645,7 @@ void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                                    filt_hor, filt_ver, (int32_t)h);
          break;
        default:
-        vp9_convolve8_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_c(src, src_stride, dst, dst_stride,
                          filter_x, x_step_q4, filter_y, y_step_q4,
                          w, h);
          break;
diff --git a/vp9/common/mips/msa/vp9_convolve8_vert_msa.c b/vpx_dsp/mips/vpx_convolve8_vert_msa.c

similarity index 98%

rename from vp9/common/mips/msa/vp9_convolve8_vert_msa.c

rename to vpx_dsp/mips/vpx_convolve8_vert_msa.c

index e9ec250..85f1757 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
@@ -8,8 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
-#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/mips/vpx_convolve_msa.h"
  
  static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
                                  uint8_t *dst, int32_t dst_stride,
@@ -650,7 +650,7 @@ static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
    }
  }
  
-void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
@@ -658,14 +658,14 @@ void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
    int8_t cnt, filt_ver[8];
  
    if (16 != y_step_q4) {
-    vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
                           filter_x, x_step_q4, filter_y, y_step_q4,
                           w, h);
      return;
    }
  
    if (((const int32_t *)filter_y)[1] == 0x800000) {
-    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+    vpx_convolve_copy(src, src_stride, dst, dst_stride,
                        filter_x, x_step_q4, filter_y, y_step_q4,
                        w, h);
      return;
@@ -703,7 +703,7 @@ void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                               &filt_ver[3], h);
          break;
        default:
-        vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
          break;
@@ -736,7 +736,7 @@ void vp9_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                               filt_ver, h);
          break;
        default:
-        vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+        vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
                               filter_x, x_step_q4, filter_y, y_step_q4,
                               w, h);
          break;
diff --git a/vp9/common/mips/msa/vp9_convolve_avg_msa.c b/vpx_dsp/mips/vpx_convolve_avg_msa.c

similarity index 99%

rename from vp9/common/mips/msa/vp9_convolve_avg_msa.c

rename to vpx_dsp/mips/vpx_convolve_avg_msa.c

index 7c11e40..4c3d978 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve_avg_msa.c
@@ -186,7 +186,7 @@ static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
    }
  }
  
-void vp9_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int32_t filter_x_stride,
                            const int16_t *filter_y, int32_t filter_y_stride,
diff --git a/vp9/common/mips/msa/vp9_convolve_copy_msa.c b/vpx_dsp/mips/vpx_convolve_copy_msa.c

similarity index 99%

rename from vp9/common/mips/msa/vp9_convolve_copy_msa.c

rename to vpx_dsp/mips/vpx_convolve_copy_msa.c

index 39a0b24..ba40122 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve_copy_msa.c
+++ b/vpx_dsp/mips/vpx_convolve_copy_msa.c
@@ -196,7 +196,7 @@ static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
    copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
  }
  
-void vp9_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int32_t filter_x_stride,
                             const int16_t *filter_y, int32_t filter_y_stride,
diff --git a/vp9/common/mips/msa/vp9_convolve_msa.h b/vpx_dsp/mips/vpx_convolve_msa.h

similarity index 97%

rename from vp9/common/mips/msa/vp9_convolve_msa.h

rename to vpx_dsp/mips/vpx_convolve_msa.h

index 71c616b..e001398 100644 (file)
--- a/vp9/common/mips/msa/vp9_convolve_msa.h
+++ b/vpx_dsp/mips/vpx_convolve_msa.h
@@ -8,11 +8,11 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#ifndef VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_
-#define VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_
+#ifndef VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_
+#define VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_
  
-#include "vp9/common/vp9_filter.h"
  #include "vpx_dsp/mips/macros_msa.h"
+#include "vpx_dsp/vpx_filter.h"
  
  extern const uint8_t mc_filt_mask_arr[16 * 3];
  
@@ -116,4 +116,4 @@ extern const uint8_t mc_filt_mask_arr[16 * 3];
    AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);          \
    ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                             \
  }
-#endif  /* VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_ */
+#endif  /* VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ */
diff --git a/vp9/common/vp9_convolve.c b/vpx_dsp/vpx_convolve.c

similarity index 93%

rename from vp9/common/vp9_convolve.c

rename to vpx_dsp/vpx_convolve.c

index 90e337f..f06da3d 100644 (file)
--- a/vp9/common/vp9_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -9,13 +9,14 @@
   */
  
  #include <assert.h>
+#include <string.h>
  
  #include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_convolve.h"
-#include "vp9/common/vp9_filter.h"
+#include "./vpx_dsp_rtcd.h"
  #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_convolve.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
  #include "vpx_ports/mem.h"
  
  static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
@@ -154,7 +155,7 @@ static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
    return (int)((const InterpKernel *)(intptr_t)f - base);
  }
  
-void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride,
                             const int16_t *filter_x, int x_step_q4,
                             const int16_t *filter_y, int y_step_q4,
@@ -169,7 +170,7 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                   x0_q4, x_step_q4, w, h);
  }
  
-void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                 uint8_t *dst, ptrdiff_t dst_stride,
                                 const int16_t *filter_x, int x_step_q4,
                                 const int16_t *filter_y, int y_step_q4,
@@ -184,7 +185,7 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                       x0_q4, x_step_q4, w, h);
  }
  
-void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                            uint8_t *dst, ptrdiff_t dst_stride,
                            const int16_t *filter_x, int x_step_q4,
                            const int16_t *filter_y, int y_step_q4,
@@ -199,7 +200,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                  y0_q4, y_step_q4, w, h);
  }
  
-void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                                uint8_t *dst, ptrdiff_t dst_stride,
                                const int16_t *filter_x, int x_step_q4,
                                const int16_t *filter_y, int y_step_q4,
@@ -214,7 +215,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                      y0_q4, y_step_q4, w, h);
  }
  
-void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                       uint8_t *dst, ptrdiff_t dst_stride,
                       const int16_t *filter_x, int x_step_q4,
                       const int16_t *filter_y, int y_step_q4,
@@ -230,7 +231,7 @@ void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
             filters_y, y0_q4, y_step_q4, w, h);
  }
  
-void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
@@ -240,12 +241,12 @@ void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    assert(w <= 64);
    assert(h <= 64);
  
-  vp9_convolve8_c(src, src_stride, temp, 64,
+  vpx_convolve8_c(src, src_stride, temp, 64,
                    filter_x, x_step_q4, filter_y, y_step_q4, w, h);
-  vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
+  vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
  }
  
-void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
                           uint8_t *dst, ptrdiff_t dst_stride,
                           const int16_t *filter_x, int filter_x_stride,
                           const int16_t *filter_y, int filter_y_stride,
@@ -262,7 +263,7 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
    }
  }
  
-void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                          uint8_t *dst, ptrdiff_t dst_stride,
                          const int16_t *filter_x, int filter_x_stride,
                          const int16_t *filter_y, int filter_y_stride,
@@ -423,7 +424,7 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
  }
  
  
-void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                    uint8_t *dst, ptrdiff_t dst_stride,
                                    const int16_t *filter_x, int x_step_q4,
                                    const int16_t *filter_y, int y_step_q4,
@@ -437,7 +438,7 @@ void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                          x0_q4, x_step_q4, w, h, bd);
  }
  
-void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                                        uint8_t *dst, ptrdiff_t dst_stride,
                                        const int16_t *filter_x, int x_step_q4,
                                        const int16_t *filter_y, int y_step_q4,
@@ -451,7 +452,7 @@ void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
                              x0_q4, x_step_q4, w, h, bd);
  }
  
-void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                                   uint8_t *dst, ptrdiff_t dst_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
@@ -465,7 +466,7 @@ void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                         y0_q4, y_step_q4, w, h, bd);
  }
  
-void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                                       uint8_t *dst, ptrdiff_t dst_stride,
                                       const int16_t *filter_x, int x_step_q4,
                                       const int16_t *filter_y, int y_step_q4,
@@ -479,7 +480,7 @@ void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                             y0_q4, y_step_q4, w, h, bd);
  }
  
-void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
@@ -495,7 +496,7 @@ void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
                    filters_y, y0_q4, y_step_q4, w, h, bd);
  }
  
-void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
+void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                                  uint8_t *dst, ptrdiff_t dst_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
@@ -505,13 +506,13 @@ void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    assert(w <= 64);
    assert(h <= 64);
  
-  vp9_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
+  vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
                           filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
-  vp9_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
+  vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
                              NULL, 0, NULL, 0, w, h, bd);
  }
  
-void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
+void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
                                  uint8_t *dst8, ptrdiff_t dst_stride,
                                  const int16_t *filter_x, int filter_x_stride,
                                  const int16_t *filter_y, int filter_y_stride,
@@ -532,7 +533,7 @@ void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
    }
  }
  
-void vp9_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
+void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
                                 uint8_t *dst8, ptrdiff_t dst_stride,
                                 const int16_t *filter_x, int filter_x_stride,
                                 const int16_t *filter_y, int filter_y_stride,
diff --git a/vp9/common/vp9_convolve.h b/vpx_dsp/vpx_convolve.h

similarity index 92%

rename from vp9/common/vp9_convolve.h

rename to vpx_dsp/vpx_convolve.h

index 8b044c8..9ed3f17 100644 (file)
--- a/vp9/common/vp9_convolve.h
+++ b/vpx_dsp/vpx_convolve.h
@@ -7,8 +7,8 @@
   *  in the file PATENTS.  All contributing project authors may
   *  be found in the AUTHORS file in the root of the source tree.
   */
-#ifndef VP9_COMMON_VP9_CONVOLVE_H_
-#define VP9_COMMON_VP9_CONVOLVE_H_
+#ifndef VPX_DSP_VPX_CONVOLVE_H_
+#define VPX_DSP_VPX_CONVOLVE_H_
  
  #include "./vpx_config.h"
  #include "vpx/vpx_integer.h"
@@ -35,4 +35,4 @@ typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
  }  // extern "C"
  #endif
  
-#endif  // VP9_COMMON_VP9_CONVOLVE_H_
+#endif  // VPX_DSP_VPX_CONVOLVE_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk

index ff2bb52..de45782 100644 (file)
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -54,6 +54,54 @@ DSP_SRCS-$(HAVE_DSPR2)  += mips/intrapred8_dspr2.c
  DSP_SRCS-$(HAVE_DSPR2)  += mips/intrapred16_dspr2.c
  endif  # CONFIG_VP9
  
+# interpolation filters
+DSP_SRCS-yes += vpx_convolve.c
+DSP_SRCS-yes += vpx_convolve.h
+DSP_SRCS-yes += vpx_filter.h
+
+DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/convolve.h
+DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/vpx_asm_stubs.c
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_subpixel_8t_sse2.asm
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_subpixel_bilinear_sse2.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_bilinear_ssse3.asm
+DSP_SRCS-$(HAVE_AVX2)  += x86/vpx_subpixel_8t_intrin_avx2.c
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_intrin_ssse3.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_high_subpixel_8t_sse2.asm
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_high_subpixel_bilinear_sse2.asm
+endif
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSE2)  += x86/vpx_convolve_copy_sse2.asm
+endif
+
+ifeq ($(HAVE_NEON_ASM),yes)
+DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve8_avg_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve8_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve_avg_neon_asm$(ASM)
+DSP_SRCS-yes += arm/vpx_convolve_neon.c
+else
+ifeq ($(HAVE_NEON),yes)
+DSP_SRCS-yes += arm/vpx_convolve_copy_neon.c
+DSP_SRCS-yes += arm/vpx_convolve8_avg_neon.c
+DSP_SRCS-yes += arm/vpx_convolve8_neon.c
+DSP_SRCS-yes += arm/vpx_convolve_avg_neon.c
+DSP_SRCS-yes += arm/vpx_convolve_neon.c
+endif  # HAVE_NEON
+endif  # HAVE_NEON_ASM
+
+# common (msa)
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_horiz_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_vert_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_horiz_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_vert_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_avg_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_copy_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_msa.h
+
  # loop filters
  DSP_SRCS-yes += loopfilter.c
  
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h

index 1fd7c15..ccb8189 100644 (file)
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -11,8 +11,6 @@
  #ifndef VPX_DSP_COMMON_H_
  #define VPX_DSP_COMMON_H_
  
-#include <stdlib.h>
-
  #include "./vpx_config.h"
  #include "vpx/vpx_integer.h"
  #include "vpx_ports/mem.h"
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index 85b2522..6e63271 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -34,6 +34,12 @@ if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
    }
  }
  
+# optimizations which depend on multiple features
+$avx2_ssse3 = '';
+if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
+  $avx2_ssse3 = 'avx2';
+}
+
  # functions that are 64 bit only.
  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
  if ($opts{arch} eq "x86_64") {
@@ -366,6 +372,62 @@ if (vpx_config("CONFIG_VP9") eq "yes") {
  }  # CONFIG_VP9
  
  #
+# Sub Pixel Filters
+#
+add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve_copy neon msa/, "$sse2_x86inc";
+
+add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve_avg neon msa/, "$sse2_x86inc";
+
+add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8 sse2 ssse3 neon msa/, "$avx2_ssse3";
+
+add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_horiz sse2 ssse3 neon msa/, "$avx2_ssse3";
+
+add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_vert sse2 ssse3 neon msa/, "$avx2_ssse3";
+
+add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_avg sse2 ssse3 neon msa/;
+
+add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon msa/;
+
+add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon msa/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+  #
+  # Sub Pixel Filters
+  #
+  add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve_copy/;
+
+  add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve_avg/;
+
+  add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_horiz/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_vert/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_avg/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
+
+  add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+  specialize qw/vpx_highbd_convolve8_avg_vert/, "$sse2_x86_64";
+}  # CONFIG_VP9_HIGHBITDEPTH
+
+#
  # Loopfilter
  #
  add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
diff --git a/vpx_dsp/vpx_filter.h b/vpx_dsp/vpx_filter.h

new file mode 100644 (file)

index 0000000..2617feb
--- /dev/null
+++ b/vpx_dsp/vpx_filter.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_VPX_FILTER_H_
+#define VPX_DSP_VPX_FILTER_H_
+
+#include "vpx/vpx_integer.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FILTER_BITS 7
+
+#define SUBPEL_BITS 4
+#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
+#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
+#define SUBPEL_TAPS 8
+
+typedef int16_t InterpKernel[SUBPEL_TAPS];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_DSP_VPX_FILTER_H_
diff --git a/vp9/common/x86/convolve.h b/vpx_dsp/x86/convolve.h

similarity index 85%

rename from vp9/common/x86/convolve.h

rename to vpx_dsp/x86/convolve.h

index de2df47..c014498 100644 (file)
--- a/vp9/common/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -7,8 +7,8 @@
   *  in the file PATENTS.  All contributing project authors may
   *  be found in the AUTHORS file in the root of the source tree.
   */
-#ifndef VP9_COMMON_X86_CONVOLVE_H_
-#define VP9_COMMON_X86_CONVOLVE_H_
+#ifndef VPX_DSP_X86_CONVOLVE_H_
+#define VPX_DSP_X86_CONVOLVE_H_
  
  #include <assert.h>
  
@@ -26,7 +26,7 @@ typedef void filter8_1dfunction (
  );
  
  #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-  void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+  void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
                                      uint8_t *dst, ptrdiff_t dst_stride, \
                                      const int16_t *filter_x, int x_step_q4, \
                                      const int16_t *filter_y, int y_step_q4, \
@@ -34,7 +34,7 @@ typedef void filter8_1dfunction (
    if (step_q4 == 16 && filter[3] != 128) { \
      if (filter[0] || filter[1] || filter[2]) { \
        while (w >= 16) { \
-        vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
+        vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \
                                                   src_stride, \
                                                   dst, \
                                                   dst_stride, \
@@ -45,7 +45,7 @@ typedef void filter8_1dfunction (
          w -= 16; \
        } \
        while (w >= 8) { \
-        vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
+        vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \
                                                  src_stride, \
                                                  dst, \
                                                  dst_stride, \
@@ -56,7 +56,7 @@ typedef void filter8_1dfunction (
          w -= 8; \
        } \
        while (w >= 4) { \
-        vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
+        vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \
                                                  src_stride, \
                                                  dst, \
                                                  dst_stride, \
@@ -68,7 +68,7 @@ typedef void filter8_1dfunction (
        } \
      } else { \
        while (w >= 16) { \
-        vp9_filter_block1d16_##dir##2_##avg##opt(src, \
+        vpx_filter_block1d16_##dir##2_##avg##opt(src, \
                                                   src_stride, \
                                                   dst, \
                                                   dst_stride, \
@@ -79,7 +79,7 @@ typedef void filter8_1dfunction (
          w -= 16; \
        } \
        while (w >= 8) { \
-        vp9_filter_block1d8_##dir##2_##avg##opt(src, \
+        vpx_filter_block1d8_##dir##2_##avg##opt(src, \
                                                  src_stride, \
                                                  dst, \
                                                  dst_stride, \
@@ -90,7 +90,7 @@ typedef void filter8_1dfunction (
          w -= 8; \
        } \
        while (w >= 4) { \
-        vp9_filter_block1d4_##dir##2_##avg##opt(src, \
+        vpx_filter_block1d4_##dir##2_##avg##opt(src, \
                                                  src_stride, \
                                                  dst, \
                                                  dst_stride, \
@@ -103,14 +103,14 @@ typedef void filter8_1dfunction (
      } \
    } \
    if (w) { \
-    vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+    vpx_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
                               filter_x, x_step_q4, filter_y, y_step_q4, \
                               w, h); \
    } \
  }
  
  #define FUN_CONV_2D(avg, opt) \
-void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
                                uint8_t *dst, ptrdiff_t dst_stride, \
                                const int16_t *filter_x, int x_step_q4, \
                                const int16_t *filter_y, int y_step_q4, \
@@ -121,23 +121,23 @@ void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
      if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
          filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
        DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
-      vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+      vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
                                  filter_x, x_step_q4, filter_y, y_step_q4, \
                                  w, h + 7); \
-      vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+      vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
                                        filter_x, x_step_q4, filter_y, \
                                        y_step_q4, w, h); \
      } else { \
        DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
-      vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
+      vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
                                  filter_x, x_step_q4, filter_y, y_step_q4, \
                                  w, h + 1); \
-      vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+      vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
                                        filter_x, x_step_q4, filter_y, \
                                        y_step_q4, w, h); \
      } \
    } else { \
-    vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+    vpx_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
                             filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
    } \
  }
@@ -155,7 +155,7 @@ typedef void highbd_filter8_1dfunction (
  );
  
  #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
-  void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
+  void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
                                             ptrdiff_t src_stride, \
                                             uint8_t *dst8, \
                                             ptrdiff_t dst_stride, \
@@ -169,7 +169,7 @@ typedef void highbd_filter8_1dfunction (
      uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
      if (filter[0] || filter[1] || filter[2]) { \
        while (w >= 16) { \
-        vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
+        vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
                                                          src_stride, \
                                                          dst, \
                                                          dst_stride, \
@@ -181,7 +181,7 @@ typedef void highbd_filter8_1dfunction (
          w -= 16; \
        } \
        while (w >= 8) { \
-        vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
+        vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
                                                         src_stride, \
                                                         dst, \
                                                         dst_stride, \
@@ -193,7 +193,7 @@ typedef void highbd_filter8_1dfunction (
          w -= 8; \
        } \
        while (w >= 4) { \
-        vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
+        vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
                                                         src_stride, \
                                                         dst, \
                                                         dst_stride, \
@@ -206,7 +206,7 @@ typedef void highbd_filter8_1dfunction (
        } \
      } else { \
        while (w >= 16) { \
-        vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
+        vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
                                                          src_stride, \
                                                          dst, \
                                                          dst_stride, \
@@ -218,7 +218,7 @@ typedef void highbd_filter8_1dfunction (
          w -= 16; \
        } \
        while (w >= 8) { \
-        vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
+        vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
                                                         src_stride, \
                                                         dst, \
                                                         dst_stride, \
@@ -230,7 +230,7 @@ typedef void highbd_filter8_1dfunction (
          w -= 8; \
        } \
        while (w >= 4) { \
-        vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
+        vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
                                                         src_stride, \
                                                         dst, \
                                                         dst_stride, \
@@ -244,14 +244,14 @@ typedef void highbd_filter8_1dfunction (
      } \
    } \
    if (w) { \
-    vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
+    vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
                                      filter_x, x_step_q4, filter_y, y_step_q4, \
                                      w, h, bd); \
    } \
  }
  
  #define HIGH_FUN_CONV_2D(avg, opt) \
-void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
                                       uint8_t *dst, ptrdiff_t dst_stride, \
                                       const int16_t *filter_x, int x_step_q4, \
                                       const int16_t *filter_y, int y_step_q4, \
@@ -262,35 +262,35 @@ void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
      if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
          filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
-      vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
+      vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
                                         CONVERT_TO_BYTEPTR(fdata2), 64, \
                                         filter_x, x_step_q4, \
                                         filter_y, y_step_q4, \
                                         w, h + 7, bd); \
-      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
+      vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
                                               64, dst, dst_stride, \
                                               filter_x, x_step_q4, \
                                               filter_y, y_step_q4, \
                                               w, h, bd); \
      } else { \
        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
-      vp9_highbd_convolve8_horiz_##opt(src, src_stride, \
+      vpx_highbd_convolve8_horiz_##opt(src, src_stride, \
                                         CONVERT_TO_BYTEPTR(fdata2), 64, \
                                         filter_x, x_step_q4, \
                                         filter_y, y_step_q4, \
                                         w, h + 1, bd); \
-      vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
+      vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
                                               dst, dst_stride, \
                                               filter_x, x_step_q4, \
                                               filter_y, y_step_q4, \
                                               w, h, bd); \
      } \
    } else { \
-    vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+    vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
                                    filter_x, x_step_q4, filter_y, y_step_q4, w, \
                                    h, bd); \
    } \
  }
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
-#endif  // VP9_COMMON_X86_CONVOLVE_H_
+#endif  // VPX_DSP_X86_CONVOLVE_H_
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vpx_dsp/x86/vpx_asm_stubs.c

similarity index 60%

rename from vp9/common/x86/vp9_asm_stubs.c

rename to vpx_dsp/x86/vpx_asm_stubs.c

index fd55fb8..422b0fc 100644 (file)
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vpx_dsp/x86/vpx_asm_stubs.c
@@ -8,53 +8,53 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-#include "./vp9_rtcd.h"
  #include "./vpx_config.h"
-#include "vp9/common/x86/convolve.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/convolve.h"
  
  #if HAVE_SSE2
-filter8_1dfunction vp9_filter_block1d16_v8_sse2;
-filter8_1dfunction vp9_filter_block1d16_h8_sse2;
-filter8_1dfunction vp9_filter_block1d8_v8_sse2;
-filter8_1dfunction vp9_filter_block1d8_h8_sse2;
-filter8_1dfunction vp9_filter_block1d4_v8_sse2;
-filter8_1dfunction vp9_filter_block1d4_h8_sse2;
-filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_v8_sse2;
+filter8_1dfunction vpx_filter_block1d16_h8_sse2;
+filter8_1dfunction vpx_filter_block1d8_v8_sse2;
+filter8_1dfunction vpx_filter_block1d8_h8_sse2;
+filter8_1dfunction vpx_filter_block1d4_v8_sse2;
+filter8_1dfunction vpx_filter_block1d4_h8_sse2;
+filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2;
  
-filter8_1dfunction vp9_filter_block1d16_v2_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_sse2;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_v2_sse2;
+filter8_1dfunction vpx_filter_block1d16_h2_sse2;
+filter8_1dfunction vpx_filter_block1d8_v2_sse2;
+filter8_1dfunction vpx_filter_block1d8_h2_sse2;
+filter8_1dfunction vpx_filter_block1d4_v2_sse2;
+filter8_1dfunction vpx_filter_block1d4_h2_sse2;
+filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2;
+filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
  
-// void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                               uint8_t *dst, ptrdiff_t dst_stride,
  //                               const int16_t *filter_x, int x_step_q4,
  //                               const int16_t *filter_y, int y_step_q4,
  //                               int w, int h);
-// void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                              uint8_t *dst, ptrdiff_t dst_stride,
  //                              const int16_t *filter_x, int x_step_q4,
  //                              const int16_t *filter_y, int y_step_q4,
  //                              int w, int h);
-// void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                                   uint8_t *dst, ptrdiff_t dst_stride,
  //                                   const int16_t *filter_x, int x_step_q4,
  //                                   const int16_t *filter_y, int y_step_q4,
  //                                   int w, int h);
-// void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                                  uint8_t *dst, ptrdiff_t dst_stride,
  //                                  const int16_t *filter_x, int x_step_q4,
  //                                  const int16_t *filter_y, int y_step_q4,
@@ -64,12 +64,12 @@ FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
  FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
  FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
  
-// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                         uint8_t *dst, ptrdiff_t dst_stride,
  //                         const int16_t *filter_x, int x_step_q4,
  //                         const int16_t *filter_y, int y_step_q4,
  //                         int w, int h);
-// void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                             uint8_t *dst, ptrdiff_t dst_stride,
  //                             const int16_t *filter_x, int x_step_q4,
  //                             const int16_t *filter_y, int y_step_q4,
@@ -78,33 +78,33 @@ FUN_CONV_2D(, sse2);
  FUN_CONV_2D(avg_ , sse2);
  
  #if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h8_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v8_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
  
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_v2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d16_h2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_v2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d8_h2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_v2_avg_sse2;
-highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
+highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
  
-// void vp9_highbd_convolve8_horiz_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src,
  //                                      ptrdiff_t src_stride,
  //                                      uint8_t *dst,
  //                                      ptrdiff_t dst_stride,
@@ -113,7 +113,7 @@ highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
  //                                      const int16_t *filter_y,
  //                                      int y_step_q4,
  //                                      int w, int h, int bd);
-// void vp9_highbd_convolve8_vert_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src,
  //                                     ptrdiff_t src_stride,
  //                                     uint8_t *dst,
  //                                     ptrdiff_t dst_stride,
@@ -122,7 +122,7 @@ highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
  //                                     const int16_t *filter_y,
  //                                     int y_step_q4,
  //                                     int w, int h, int bd);
-// void vp9_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src,
  //                                          ptrdiff_t src_stride,
  //                                          uint8_t *dst,
  //                                          ptrdiff_t dst_stride,
@@ -131,7 +131,7 @@ highbd_filter8_1dfunction vp9_highbd_filter_block1d4_h2_avg_sse2;
  //                                          const int16_t *filter_y,
  //                                          int y_step_q4,
  //                                          int w, int h, int bd);
-// void vp9_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
+// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src,
  //                                         ptrdiff_t src_stride,
  //                                         uint8_t *dst,
  //                                         ptrdiff_t dst_stride,
@@ -146,12 +146,12 @@ HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
  HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
                   sse2);
  
-// void vp9_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                                uint8_t *dst, ptrdiff_t dst_stride,
  //                                const int16_t *filter_x, int x_step_q4,
  //                                const int16_t *filter_y, int y_step_q4,
  //                                int w, int h, int bd);
-// void vp9_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
  //                                    uint8_t *dst, ptrdiff_t dst_stride,
  //                                    const int16_t *filter_x, int x_step_q4,
  //                                    const int16_t *filter_y, int y_step_q4,
diff --git a/vp9/common/x86/vp9_copy_sse2.asm b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm

similarity index 99%

rename from vp9/common/x86/vp9_copy_sse2.asm

rename to vpx_dsp/x86/vpx_convolve_copy_sse2.asm

index b263837..6cd620a 100644 (file)
--- a/vp9/common/x86/vp9_copy_sse2.asm
+++ b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -8,6 +8,8 @@
  ;  be found in the AUTHORS file in the root of the source tree.
  ;
  
+%define program_name vpx
+
  %include "third_party/x86inc/x86inc.asm"
  
  SECTION .text
diff --git a/vp9/common/x86/vp9_high_subpixel_8t_sse2.asm b/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm

similarity index 94%

rename from vp9/common/x86/vp9_high_subpixel_8t_sse2.asm

rename to vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm

index 29ec151..bfc816f 100644 (file)
--- a/vp9/common/x86/vp9_high_subpixel_8t_sse2.asm
+++ b/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm
@@ -197,7 +197,7 @@
      movdqu      [rdi + %2], xmm0
  %endm
  
-;void vp9_filter_block1d4_v8_sse2
+;void vpx_filter_block1d4_v8_sse2
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -206,8 +206,8 @@
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_highbd_filter_block1d4_v8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v8_sse2):
+global sym(vpx_highbd_filter_block1d4_v8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -267,7 +267,7 @@ sym(vp9_highbd_filter_block1d4_v8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d8_v8_sse2
+;void vpx_filter_block1d8_v8_sse2
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -276,8 +276,8 @@ sym(vp9_highbd_filter_block1d4_v8_sse2):
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_highbd_filter_block1d8_v8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v8_sse2):
+global sym(vpx_highbd_filter_block1d8_v8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -326,7 +326,7 @@ sym(vp9_highbd_filter_block1d8_v8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d16_v8_sse2
+;void vpx_filter_block1d16_v8_sse2
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -335,8 +335,8 @@ sym(vp9_highbd_filter_block1d8_v8_sse2):
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_highbd_filter_block1d16_v8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v8_sse2):
+global sym(vpx_highbd_filter_block1d16_v8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -389,8 +389,8 @@ sym(vp9_highbd_filter_block1d16_v8_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d4_v8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v8_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_v8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -450,8 +450,8 @@ sym(vp9_highbd_filter_block1d4_v8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d8_v8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v8_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_v8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -499,8 +499,8 @@ sym(vp9_highbd_filter_block1d8_v8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d16_v8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v8_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_v8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -552,7 +552,7 @@ sym(vp9_highbd_filter_block1d16_v8_avg_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d4_h8_sse2
+;void vpx_filter_block1d4_h8_sse2
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -561,8 +561,8 @@ sym(vp9_highbd_filter_block1d16_v8_avg_sse2):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_highbd_filter_block1d4_h8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h8_sse2):
+global sym(vpx_highbd_filter_block1d4_h8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -627,7 +627,7 @@ sym(vp9_highbd_filter_block1d4_h8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d8_h8_sse2
+;void vpx_filter_block1d8_h8_sse2
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -636,8 +636,8 @@ sym(vp9_highbd_filter_block1d4_h8_sse2):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_highbd_filter_block1d8_h8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h8_sse2):
+global sym(vpx_highbd_filter_block1d8_h8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -693,7 +693,7 @@ sym(vp9_highbd_filter_block1d8_h8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d16_h8_sse2
+;void vpx_filter_block1d16_h8_sse2
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -702,8 +702,8 @@ sym(vp9_highbd_filter_block1d8_h8_sse2):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_highbd_filter_block1d16_h8_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h8_sse2):
+global sym(vpx_highbd_filter_block1d16_h8_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -770,8 +770,8 @@ sym(vp9_highbd_filter_block1d16_h8_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d4_h8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h8_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_h8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -836,8 +836,8 @@ sym(vp9_highbd_filter_block1d4_h8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d8_h8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h8_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_h8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -893,8 +893,8 @@ sym(vp9_highbd_filter_block1d8_h8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d16_h8_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h8_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_h8_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
diff --git a/vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm b/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm

similarity index 89%

rename from vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm

rename to vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm

index 9378412..72f2ff7 100644 (file)
--- a/vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm
+++ b/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm
@@ -171,8 +171,8 @@
  %endm
  %endif
  
-global sym(vp9_highbd_filter_block1d4_v2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v2_sse2):
+global sym(vpx_highbd_filter_block1d4_v2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -196,8 +196,8 @@ sym(vp9_highbd_filter_block1d4_v2_sse2):
      ret
  
  %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_v2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v2_sse2):
+global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -222,8 +222,8 @@ sym(vp9_highbd_filter_block1d8_v2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d16_v2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v2_sse2):
+global sym(vpx_highbd_filter_block1d16_v2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -251,8 +251,8 @@ sym(vp9_highbd_filter_block1d16_v2_sse2):
      ret
  %endif
  
-global sym(vp9_highbd_filter_block1d4_v2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_v2_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_v2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_v2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -276,8 +276,8 @@ sym(vp9_highbd_filter_block1d4_v2_avg_sse2):
      ret
  
  %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_v2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_v2_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_v2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -302,8 +302,8 @@ sym(vp9_highbd_filter_block1d8_v2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d16_v2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_v2_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_v2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_v2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -331,8 +331,8 @@ sym(vp9_highbd_filter_block1d16_v2_avg_sse2):
      ret
  %endif
  
-global sym(vp9_highbd_filter_block1d4_h2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h2_sse2):
+global sym(vpx_highbd_filter_block1d4_h2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -357,8 +357,8 @@ sym(vp9_highbd_filter_block1d4_h2_sse2):
      ret
  
  %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_h2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h2_sse2):
+global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -383,8 +383,8 @@ sym(vp9_highbd_filter_block1d8_h2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d16_h2_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h2_sse2):
+global sym(vpx_highbd_filter_block1d16_h2_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -412,8 +412,8 @@ sym(vp9_highbd_filter_block1d16_h2_sse2):
      ret
  %endif
  
-global sym(vp9_highbd_filter_block1d4_h2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d4_h2_avg_sse2):
+global sym(vpx_highbd_filter_block1d4_h2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d4_h2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -438,8 +438,8 @@ sym(vp9_highbd_filter_block1d4_h2_avg_sse2):
      ret
  
  %if ARCH_X86_64
-global sym(vp9_highbd_filter_block1d8_h2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d8_h2_avg_sse2):
+global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d8_h2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
@@ -464,8 +464,8 @@ sym(vp9_highbd_filter_block1d8_h2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_highbd_filter_block1d16_h2_avg_sse2) PRIVATE
-sym(vp9_highbd_filter_block1d16_h2_avg_sse2):
+global sym(vpx_highbd_filter_block1d16_h2_avg_sse2) PRIVATE
+sym(vpx_highbd_filter_block1d16_h2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c

similarity index 92%

rename from vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c

rename to vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c

index cee8d1e..29ede19 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
@@ -11,11 +11,11 @@
  // Due to a header conflict between math.h and intrinsics includes with ceil()
  // in certain configurations under vs9 this include needs to precede
  // immintrin.h.
-#include "./vp9_rtcd.h"
  
  #include <immintrin.h>
  
-#include "vp9/common/x86/convolve.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/convolve.h"
  #include "vpx_ports/mem.h"
  
  // filters for 16_h8 and 16_v8
@@ -60,7 +60,7 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
  # define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
  #endif  // __clang__
  
-static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
                                           ptrdiff_t src_pixels_per_line,
                                           uint8_t *output_ptr,
                                           ptrdiff_t output_pitch,
@@ -304,7 +304,7 @@ static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
    }
  }
  
-static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
                                           ptrdiff_t src_pitch,
                                           uint8_t *output_ptr,
                                           ptrdiff_t out_pitch,
@@ -551,41 +551,41 @@ static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
  }
  
  #if HAVE_AVX2 && HAVE_SSSE3
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
  #if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
+filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
+#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3
+#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3
+#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3
  #else  // ARCH_X86
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
+filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
+#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3
+#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3
+#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3
  #endif  // ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
-#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
-#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
-#define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
-#define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
-#define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
-#define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
-// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+filter8_1dfunction vpx_filter_block1d16_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h2_ssse3;
+#define vpx_filter_block1d4_v8_avx2 vpx_filter_block1d4_v8_ssse3
+#define vpx_filter_block1d16_v2_avx2 vpx_filter_block1d16_v2_ssse3
+#define vpx_filter_block1d16_h2_avx2 vpx_filter_block1d16_h2_ssse3
+#define vpx_filter_block1d8_v2_avx2  vpx_filter_block1d8_v2_ssse3
+#define vpx_filter_block1d8_h2_avx2  vpx_filter_block1d8_h2_ssse3
+#define vpx_filter_block1d4_v2_avx2  vpx_filter_block1d4_v2_ssse3
+#define vpx_filter_block1d4_h2_avx2  vpx_filter_block1d4_h2_ssse3
+// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
  //                                uint8_t *dst, ptrdiff_t dst_stride,
  //                                const int16_t *filter_x, int x_step_q4,
  //                                const int16_t *filter_y, int y_step_q4,
  //                                int w, int h);
-// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
  //                               uint8_t *dst, ptrdiff_t dst_stride,
  //                               const int16_t *filter_x, int x_step_q4,
  //                               const int16_t *filter_y, int y_step_q4,
@@ -593,7 +593,7 @@ filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
  FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
  FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
  
-// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
  //                          uint8_t *dst, ptrdiff_t dst_stride,
  //                          const int16_t *filter_x, int x_step_q4,
  //                          const int16_t *filter_y, int y_step_q4,
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c

similarity index 89%

rename from vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c

rename to vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c

index 5fd2857..01771de 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -11,11 +11,11 @@
  // Due to a header conflict between math.h and intrinsics includes with ceil()
  // in certain configurations under vs9 this include needs to precede
  // tmmintrin.h.
-#include "./vp9_rtcd.h"
  
  #include <tmmintrin.h>
  
-#include "vp9/common/x86/convolve.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/convolve.h"
  #include "vpx_ports/mem.h"
  #include "vpx_ports/emmintrin_compat.h"
  
@@ -46,11 +46,11 @@ DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
  };
  
  // These are reused by the avx2 intrinsics.
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
  
-void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
+void vpx_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
                                           ptrdiff_t src_pixels_per_line,
                                           uint8_t *output_ptr,
                                           ptrdiff_t output_pitch,
@@ -121,7 +121,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
    }
  }
  
-void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
+void vpx_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
                                           ptrdiff_t src_pixels_per_line,
                                           uint8_t *output_ptr,
                                           ptrdiff_t output_pitch,
@@ -201,7 +201,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
    }
  }
  
-static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
                                                   ptrdiff_t src_pixels_per_line,
                                                   uint8_t *output_ptr,
                                                   ptrdiff_t output_pitch,
@@ -318,7 +318,7 @@ static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
    }
  }
  
-void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
+void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
                                           ptrdiff_t src_pitch,
                                           uint8_t *output_ptr,
                                           ptrdiff_t out_pitch,
@@ -406,7 +406,7 @@ void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
    }
  }
  
-static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
+static void vpx_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
                                                   ptrdiff_t src_pitch,
                                                   uint8_t *output_ptr,
                                                   ptrdiff_t out_pitch,
@@ -522,61 +522,61 @@ static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
  }
  
  #if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
-#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
-#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
+filter8_1dfunction vpx_filter_block1d16_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
+#define vpx_filter_block1d16_v8_ssse3 vpx_filter_block1d16_v8_intrin_ssse3
+#define vpx_filter_block1d16_h8_ssse3 vpx_filter_block1d16_h8_intrin_ssse3
+#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3
+#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3
+#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3
  #else  // ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d16_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
  #endif  // ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
-
-// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3;
+
+filter8_1dfunction vpx_filter_block1d16_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v2_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h2_ssse3;
+filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3;
+
+// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
  //                                uint8_t *dst, ptrdiff_t dst_stride,
  //                                const int16_t *filter_x, int x_step_q4,
  //                                const int16_t *filter_y, int y_step_q4,
  //                                int w, int h);
-// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
  //                               uint8_t *dst, ptrdiff_t dst_stride,
  //                               const int16_t *filter_x, int x_step_q4,
  //                               const int16_t *filter_y, int y_step_q4,
  //                               int w, int h);
-// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
  //                                    uint8_t *dst, ptrdiff_t dst_stride,
  //                                    const int16_t *filter_x, int x_step_q4,
  //                                    const int16_t *filter_y, int y_step_q4,
  //                                    int w, int h);
-// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
  //                                   uint8_t *dst, ptrdiff_t dst_stride,
  //                                   const int16_t *filter_x, int x_step_q4,
  //                                   const int16_t *filter_y, int y_step_q4,
@@ -587,12 +587,12 @@ FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
  FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
              ssse3);
  
-// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
  //                          uint8_t *dst, ptrdiff_t dst_stride,
  //                          const int16_t *filter_x, int x_step_q4,
  //                          const int16_t *filter_y, int y_step_q4,
  //                          int w, int h);
-// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
  //                              uint8_t *dst, ptrdiff_t dst_stride,
  //                              const int16_t *filter_x, int x_step_q4,
  //                              const int16_t *filter_y, int y_step_q4,
diff --git a/vp9/common/x86/vp9_subpixel_8t_sse2.asm b/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm

similarity index 94%

rename from vp9/common/x86/vp9_subpixel_8t_sse2.asm

rename to vpx_dsp/x86/vpx_subpixel_8t_sse2.asm

index 9dc8d0a..08f3d6a 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_8t_sse2.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm
@@ -176,7 +176,7 @@
      movq        [rdi + %2], xmm0
  %endm
  
-;void vp9_filter_block1d4_v8_sse2
+;void vpx_filter_block1d4_v8_sse2
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -185,8 +185,8 @@
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d4_v8_sse2) PRIVATE
-sym(vp9_filter_block1d4_v8_sse2):
+global sym(vpx_filter_block1d4_v8_sse2) PRIVATE
+sym(vpx_filter_block1d4_v8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -243,7 +243,7 @@ sym(vp9_filter_block1d4_v8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d8_v8_sse2
+;void vpx_filter_block1d8_v8_sse2
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -252,8 +252,8 @@ sym(vp9_filter_block1d4_v8_sse2):
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d8_v8_sse2) PRIVATE
-sym(vp9_filter_block1d8_v8_sse2):
+global sym(vpx_filter_block1d8_v8_sse2) PRIVATE
+sym(vpx_filter_block1d8_v8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -302,7 +302,7 @@ sym(vp9_filter_block1d8_v8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d16_v8_sse2
+;void vpx_filter_block1d16_v8_sse2
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -311,8 +311,8 @@ sym(vp9_filter_block1d8_v8_sse2):
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d16_v8_sse2) PRIVATE
-sym(vp9_filter_block1d16_v8_sse2):
+global sym(vpx_filter_block1d16_v8_sse2) PRIVATE
+sym(vpx_filter_block1d16_v8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -365,8 +365,8 @@ sym(vp9_filter_block1d16_v8_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_v8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_v8_avg_sse2):
+global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_v8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -423,8 +423,8 @@ sym(vp9_filter_block1d4_v8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_v8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_v8_avg_sse2):
+global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_v8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -472,8 +472,8 @@ sym(vp9_filter_block1d8_v8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_v8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_v8_avg_sse2):
+global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_v8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -525,7 +525,7 @@ sym(vp9_filter_block1d16_v8_avg_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d4_h8_sse2
+;void vpx_filter_block1d4_h8_sse2
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -534,8 +534,8 @@ sym(vp9_filter_block1d16_v8_avg_sse2):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d4_h8_sse2) PRIVATE
-sym(vp9_filter_block1d4_h8_sse2):
+global sym(vpx_filter_block1d4_h8_sse2) PRIVATE
+sym(vpx_filter_block1d4_h8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -599,7 +599,7 @@ sym(vp9_filter_block1d4_h8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d8_h8_sse2
+;void vpx_filter_block1d8_h8_sse2
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -608,8 +608,8 @@ sym(vp9_filter_block1d4_h8_sse2):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d8_h8_sse2) PRIVATE
-sym(vp9_filter_block1d8_h8_sse2):
+global sym(vpx_filter_block1d8_h8_sse2) PRIVATE
+sym(vpx_filter_block1d8_h8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -674,7 +674,7 @@ sym(vp9_filter_block1d8_h8_sse2):
      pop         rbp
      ret
  
-;void vp9_filter_block1d16_h8_sse2
+;void vpx_filter_block1d16_h8_sse2
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -683,8 +683,8 @@ sym(vp9_filter_block1d8_h8_sse2):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d16_h8_sse2) PRIVATE
-sym(vp9_filter_block1d16_h8_sse2):
+global sym(vpx_filter_block1d16_h8_sse2) PRIVATE
+sym(vpx_filter_block1d16_h8_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -769,8 +769,8 @@ sym(vp9_filter_block1d16_h8_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_h8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_h8_avg_sse2):
+global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_h8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -834,8 +834,8 @@ sym(vp9_filter_block1d4_h8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_h8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_h8_avg_sse2):
+global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_h8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -900,8 +900,8 @@ sym(vp9_filter_block1d8_h8_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_h8_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_h8_avg_sse2):
+global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_h8_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm

similarity index 95%

rename from vp9/common/x86/vp9_subpixel_8t_ssse3.asm

rename to vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm

index 4a5bf1b..68acc03 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -310,7 +310,7 @@
      jnz         .loop
  %endm
  
-;void vp9_filter_block1d8_v8_ssse3
+;void vpx_filter_block1d8_v8_ssse3
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -319,8 +319,8 @@
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v8_ssse3):
+global sym(vpx_filter_block1d4_v8_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v8_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -351,7 +351,7 @@ sym(vp9_filter_block1d4_v8_ssse3):
      pop         rbp
      ret
  
-;void vp9_filter_block1d8_v8_ssse3
+;void vpx_filter_block1d8_v8_ssse3
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -360,8 +360,8 @@ sym(vp9_filter_block1d4_v8_ssse3):
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v8_ssse3):
+global sym(vpx_filter_block1d8_v8_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v8_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -392,7 +392,7 @@ sym(vp9_filter_block1d8_v8_ssse3):
      pop         rbp
      ret
  
-;void vp9_filter_block1d16_v8_ssse3
+;void vpx_filter_block1d16_v8_ssse3
  ;(
  ;    unsigned char *src_ptr,
  ;    unsigned int   src_pitch,
@@ -401,8 +401,8 @@ sym(vp9_filter_block1d8_v8_ssse3):
  ;    unsigned int   output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v8_ssse3):
+global sym(vpx_filter_block1d16_v8_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v8_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -436,8 +436,8 @@ sym(vp9_filter_block1d16_v8_ssse3):
  ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
  
-global sym(vp9_filter_block1d4_v8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v8_avg_ssse3):
+global sym(vpx_filter_block1d4_v8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v8_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -468,8 +468,8 @@ sym(vp9_filter_block1d4_v8_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_v8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v8_avg_ssse3):
+global sym(vpx_filter_block1d8_v8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v8_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -500,8 +500,8 @@ sym(vp9_filter_block1d8_v8_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_v8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v8_avg_ssse3):
+global sym(vpx_filter_block1d16_v8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v8_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -838,7 +838,7 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
      jnz         .loop
  %endm
  
-;void vp9_filter_block1d4_h8_ssse3
+;void vpx_filter_block1d4_h8_ssse3
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -847,8 +847,8 @@ sym(vp9_filter_block1d16_v8_avg_ssse3):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h8_ssse3):
+global sym(vpx_filter_block1d4_h8_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h8_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -877,7 +877,7 @@ sym(vp9_filter_block1d4_h8_ssse3):
      pop         rbp
      ret
  
-;void vp9_filter_block1d8_h8_ssse3
+;void vpx_filter_block1d8_h8_ssse3
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -886,8 +886,8 @@ sym(vp9_filter_block1d4_h8_ssse3):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h8_ssse3):
+global sym(vpx_filter_block1d8_h8_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h8_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -919,7 +919,7 @@ sym(vp9_filter_block1d8_h8_ssse3):
      pop         rbp
      ret
  
-;void vp9_filter_block1d16_h8_ssse3
+;void vpx_filter_block1d16_h8_ssse3
  ;(
  ;    unsigned char  *src_ptr,
  ;    unsigned int    src_pixels_per_line,
@@ -928,8 +928,8 @@ sym(vp9_filter_block1d8_h8_ssse3):
  ;    unsigned int    output_height,
  ;    short *filter
  ;)
-global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h8_ssse3):
+global sym(vpx_filter_block1d16_h8_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h8_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -961,8 +961,8 @@ sym(vp9_filter_block1d16_h8_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_h8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h8_avg_ssse3):
+global sym(vpx_filter_block1d4_h8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h8_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -991,8 +991,8 @@ sym(vp9_filter_block1d4_h8_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h8_avg_ssse3):
+global sym(vpx_filter_block1d8_h8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h8_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -1024,8 +1024,8 @@ sym(vp9_filter_block1d8_h8_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_h8_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h8_avg_ssse3):
+global sym(vpx_filter_block1d16_h8_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h8_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
diff --git a/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm

similarity index 89%

rename from vp9/common/x86/vp9_subpixel_bilinear_sse2.asm

rename to vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm

index d94ccf2..a378dd0 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_bilinear_sse2.asm
+++ b/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm
@@ -131,8 +131,8 @@
      dec         rcx
  %endm
  
-global sym(vp9_filter_block1d4_v2_sse2) PRIVATE
-sym(vp9_filter_block1d4_v2_sse2):
+global sym(vpx_filter_block1d4_v2_sse2) PRIVATE
+sym(vpx_filter_block1d4_v2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -155,8 +155,8 @@ sym(vp9_filter_block1d4_v2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_v2_sse2) PRIVATE
-sym(vp9_filter_block1d8_v2_sse2):
+global sym(vpx_filter_block1d8_v2_sse2) PRIVATE
+sym(vpx_filter_block1d8_v2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -181,8 +181,8 @@ sym(vp9_filter_block1d8_v2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_v2_sse2) PRIVATE
-sym(vp9_filter_block1d16_v2_sse2):
+global sym(vpx_filter_block1d16_v2_sse2) PRIVATE
+sym(vpx_filter_block1d16_v2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -209,8 +209,8 @@ sym(vp9_filter_block1d16_v2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_v2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_v2_avg_sse2):
+global sym(vpx_filter_block1d4_v2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_v2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -233,8 +233,8 @@ sym(vp9_filter_block1d4_v2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_v2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_v2_avg_sse2):
+global sym(vpx_filter_block1d8_v2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_v2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -259,8 +259,8 @@ sym(vp9_filter_block1d8_v2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_v2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_v2_avg_sse2):
+global sym(vpx_filter_block1d16_v2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_v2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -287,8 +287,8 @@ sym(vp9_filter_block1d16_v2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_h2_sse2) PRIVATE
-sym(vp9_filter_block1d4_h2_sse2):
+global sym(vpx_filter_block1d4_h2_sse2) PRIVATE
+sym(vpx_filter_block1d4_h2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -312,8 +312,8 @@ sym(vp9_filter_block1d4_h2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_h2_sse2) PRIVATE
-sym(vp9_filter_block1d8_h2_sse2):
+global sym(vpx_filter_block1d8_h2_sse2) PRIVATE
+sym(vpx_filter_block1d8_h2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -339,8 +339,8 @@ sym(vp9_filter_block1d8_h2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_h2_sse2) PRIVATE
-sym(vp9_filter_block1d16_h2_sse2):
+global sym(vpx_filter_block1d16_h2_sse2) PRIVATE
+sym(vpx_filter_block1d16_h2_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -367,8 +367,8 @@ sym(vp9_filter_block1d16_h2_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_h2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d4_h2_avg_sse2):
+global sym(vpx_filter_block1d4_h2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d4_h2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -392,8 +392,8 @@ sym(vp9_filter_block1d4_h2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_h2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d8_h2_avg_sse2):
+global sym(vpx_filter_block1d8_h2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d8_h2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -419,8 +419,8 @@ sym(vp9_filter_block1d8_h2_avg_sse2):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_h2_avg_sse2) PRIVATE
-sym(vp9_filter_block1d16_h2_avg_sse2):
+global sym(vpx_filter_block1d16_h2_avg_sse2) PRIVATE
+sym(vpx_filter_block1d16_h2_avg_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
diff --git a/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm

similarity index 88%

rename from vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm

rename to vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm

index b5e18fe..3c8cfd2 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm
@@ -109,8 +109,8 @@
      dec         rcx
  %endm
  
-global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v2_ssse3):
+global sym(vpx_filter_block1d4_v2_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v2_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -133,8 +133,8 @@ sym(vp9_filter_block1d4_v2_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v2_ssse3):
+global sym(vpx_filter_block1d8_v2_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v2_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -159,8 +159,8 @@ sym(vp9_filter_block1d8_v2_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v2_ssse3):
+global sym(vpx_filter_block1d16_v2_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v2_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -186,8 +186,8 @@ sym(vp9_filter_block1d16_v2_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v2_avg_ssse3):
+global sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_v2_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -210,8 +210,8 @@ sym(vp9_filter_block1d4_v2_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v2_avg_ssse3):
+global sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_v2_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -236,8 +236,8 @@ sym(vp9_filter_block1d8_v2_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v2_avg_ssse3):
+global sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_v2_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -263,8 +263,8 @@ sym(vp9_filter_block1d16_v2_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h2_ssse3):
+global sym(vpx_filter_block1d4_h2_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h2_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -288,8 +288,8 @@ sym(vp9_filter_block1d4_h2_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h2_ssse3):
+global sym(vpx_filter_block1d8_h2_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h2_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -315,8 +315,8 @@ sym(vp9_filter_block1d8_h2_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h2_ssse3):
+global sym(vpx_filter_block1d16_h2_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h2_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -342,8 +342,8 @@ sym(vp9_filter_block1d16_h2_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h2_avg_ssse3):
+global sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d4_h2_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -367,8 +367,8 @@ sym(vp9_filter_block1d4_h2_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h2_avg_ssse3):
+global sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d8_h2_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
@@ -394,8 +394,8 @@ sym(vp9_filter_block1d8_h2_avg_ssse3):
      pop         rbp
      ret
  
-global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h2_avg_ssse3):
+global sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE
+sym(vpx_filter_block1d16_h2_avg_ssse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 6
author	Zoe Liu <zoeliu@google.com>
	Wed, 22 Jul 2015 17:40:42 +0000 (10:40 -0700)
committer	Zoe Liu <zoeliu@google.com>
	Fri, 31 Jul 2015 17:27:33 +0000 (10:27 -0700)
test/convolve_test.cc		patch \| blob \| history
vp9/common/arm/neon/vp9_convolve8_avg_neon.c	[deleted file]	patch \| blob \| history
vp9/common/arm/neon/vp9_convolve8_neon.c	[deleted file]	patch \| blob \| history
vp9/common/arm/neon/vp9_convolve_avg_neon.c	[deleted file]	patch \| blob \| history
vp9/common/arm/neon/vp9_copy_neon.c	[deleted file]	patch \| blob \| history
vp9/common/vp9_entropymode.h		patch \| blob \| history
vp9/common/vp9_filter.h		patch \| blob \| history
vp9/common/vp9_idct.h		patch \| blob \| history
vp9/common/vp9_reconinter.c		patch \| blob \| history
vp9/common/vp9_reconinter.h		patch \| blob \| history
vp9/common/vp9_reconintra.c		patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/common/vp9_scale.c		patch \| blob \| history
vp9/common/vp9_scale.h		patch \| blob \| history
vp9/decoder/vp9_decodeframe.c		patch \| blob \| history
vp9/encoder/vp9_blockiness.c		patch \| blob \| history
vp9/encoder/vp9_denoiser.c		patch \| blob \| history
vp9/encoder/vp9_encoder.c		patch \| blob \| history
vp9/encoder/vp9_pickmode.c		patch \| blob \| history
vp9/encoder/vp9_resize.c		patch \| blob \| history
vp9/vp9_common.mk		patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_avg_neon.c	[new file with mode: 0644]	patch \| blob
vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm	[moved from vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm with 92% similarity]	patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_neon.c	[new file with mode: 0644]	patch \| blob
vpx_dsp/arm/vpx_convolve8_neon_asm.asm	[moved from vp9/common/arm/neon/vp9_convolve8_neon_asm.asm with 92% similarity]	patch \| blob \| history
vpx_dsp/arm/vpx_convolve_avg_neon.c	[new file with mode: 0644]	patch \| blob
vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm	[moved from vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm with 98% similarity]	patch \| blob \| history
vpx_dsp/arm/vpx_convolve_copy_neon.c	[new file with mode: 0644]	patch \| blob
vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm	[moved from vp9/common/arm/neon/vp9_copy_neon_asm.asm with 97% similarity]	patch \| blob \| history
vpx_dsp/arm/vpx_convolve_neon.c	[moved from vp9/common/arm/neon/vp9_convolve_neon.c with 85% similarity]	patch \| blob \| history
vpx_dsp/loopfilter.c		patch \| blob \| history
vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c	[moved from vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c with 98% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve8_avg_msa.c	[moved from vp9/common/mips/msa/vp9_convolve8_avg_msa.c with 98% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c	[moved from vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c with 98% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve8_horiz_msa.c	[moved from vp9/common/mips/msa/vp9_convolve8_horiz_msa.c with 98% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve8_msa.c	[moved from vp9/common/mips/msa/vp9_convolve8_msa.c with 98% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve8_vert_msa.c	[moved from vp9/common/mips/msa/vp9_convolve8_vert_msa.c with 98% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve_avg_msa.c	[moved from vp9/common/mips/msa/vp9_convolve_avg_msa.c with 99% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve_copy_msa.c	[moved from vp9/common/mips/msa/vp9_convolve_copy_msa.c with 99% similarity]	patch \| blob \| history
vpx_dsp/mips/vpx_convolve_msa.h	[moved from vp9/common/mips/msa/vp9_convolve_msa.h with 97% similarity]	patch \| blob \| history
vpx_dsp/vpx_convolve.c	[moved from vp9/common/vp9_convolve.c with 93% similarity]	patch \| blob \| history
vpx_dsp/vpx_convolve.h	[moved from vp9/common/vp9_convolve.h with 92% similarity]	patch \| blob \| history
vpx_dsp/vpx_dsp.mk		patch \| blob \| history
vpx_dsp/vpx_dsp_common.h		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history
vpx_dsp/vpx_filter.h	[new file with mode: 0644]	patch \| blob
vpx_dsp/x86/convolve.h	[moved from vp9/common/x86/convolve.h with 85% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_asm_stubs.c	[moved from vp9/common/x86/vp9_asm_stubs.c with 60% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_convolve_copy_sse2.asm	[moved from vp9/common/x86/vp9_copy_sse2.asm with 99% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm	[moved from vp9/common/x86/vp9_high_subpixel_8t_sse2.asm with 94% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm	[moved from vp9/common/x86/vp9_high_subpixel_bilinear_sse2.asm with 89% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c	[moved from vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c with 92% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c	[moved from vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c with 89% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_subpixel_8t_sse2.asm	[moved from vp9/common/x86/vp9_subpixel_8t_sse2.asm with 94% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm	[moved from vp9/common/x86/vp9_subpixel_8t_ssse3.asm with 95% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm	[moved from vp9/common/x86/vp9_subpixel_bilinear_sse2.asm with 89% similarity]	patch \| blob \| history
vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm	[moved from vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm with 88% similarity]	patch \| blob \| history