vpx_lpf_vertical_4: remove unused count param

author James Zern <jzern@google.com>

Fri, 12 Feb 2016 03:54:51 +0000 (19:54 -0800)

committer James Zern <jzern@google.com>

Tue, 16 Feb 2016 22:59:00 +0000 (14:59 -0800)
author James Zern <jzern@google.com>
Fri, 12 Feb 2016 03:54:51 +0000 (19:54 -0800)
committer James Zern <jzern@google.com>
Tue, 16 Feb 2016 22:59:00 +0000 (14:59 -0800)
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc

index 9697c88..5c83f3a 100644 (file)
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -459,7 +459,8 @@ INSTANTIATE_TEST_CASE_P(
      MMX, Loop8Test6Param,
      ::testing::Values(
          make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1),
-        make_tuple(&vpx_lpf_vertical_4_mmx, &vpx_lpf_vertical_4_c, 8, 1)));
+        make_tuple(&wrapper_nc<vpx_lpf_vertical_4_mmx>,
+                   &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
  #endif  // HAVE_MMX
  
  #if HAVE_SSE2
@@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P(
                     &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
          make_tuple(&vpx_lpf_horizontal_4_neon,
                     &vpx_lpf_horizontal_4_c, 8, 1),
-        make_tuple(&vpx_lpf_vertical_4_neon,
-                   &vpx_lpf_vertical_4_c, 8, 1)));
+        make_tuple(&wrapper_nc<vpx_lpf_vertical_4_neon>,
+                   &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
  INSTANTIATE_TEST_CASE_P(
      NEON, Loop8Test9Param,
      ::testing::Values(
@@ -637,7 +638,8 @@ INSTANTIATE_TEST_CASE_P(
                     &vpx_lpf_horizontal_16_c, 8, 1),
          make_tuple(&vpx_lpf_horizontal_16_dspr2,
                     &vpx_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_vertical_4_dspr2>,
+                   &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_8_dspr2>,
                     &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_16_dspr2>,
@@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P(
          make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
          make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
          make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
-        make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_vertical_4_msa>,
+                   &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_8_msa>,
                     &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_16_msa>,
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c

index 4171c1e..9f55dc2 100644 (file)
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
                                    lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                    lfi1->hev_thr);
          } else if (mask_4x4_0 & 1) {
-          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
-                             1);
+          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
          } else {
            vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+                             lfi1->hev_thr);
          }
        }
  
@@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
                                    lfi1->hev_thr);
          } else if (mask_4x4_int_0 & 1) {
            vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                             lfi0->hev_thr, 1);
+                             lfi0->hev_thr);
          } else {
            vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+                             lfi1->hev_thr);
          }
        }
      }
@@ -1128,11 +1127,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
        } else if (mask_8x8 & 1) {
          vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
        } else if (mask_4x4 & 1) {
-        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
        }
      }
      if (mask_4x4_int & 1)
-      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
      s += 8;
      lfl += 1;
      mask_16x16 >>= 1;
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c

index 8c281c2..e892f78 100644 (file)
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
                                    lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                    lfi1->hev_thr);
          } else if (mask_4x4_0 & 1) {
-          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
-                             1);
+          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
          } else {
            vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+                             lfi1->hev_thr);
          }
        }
  
@@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
                                    lfi1->hev_thr);
          } else if (mask_4x4_int_0 & 1) {
            vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                             lfi0->hev_thr, 1);
+                             lfi0->hev_thr);
          } else {
            vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr, 1);
+                             lfi1->hev_thr);
          }
        }
      }
@@ -1103,11 +1102,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
        } else if (mask_8x8 & 1) {
          vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
        } else if (mask_4x4 & 1) {
-        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+        vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
        }
      }
      if (mask_4x4_int & 1)
-      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+      vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
      s += 8;
      lfl += 1;
      mask_16x16 >>= 1;
diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm

index e45e34c..d794f55 100644 (file)
--- a/vpx_dsp/arm/loopfilter_4_neon.asm
+++ b/vpx_dsp/arm/loopfilter_4_neon.asm
@@ -79,37 +79,29 @@ end_vpx_lf_h_edge
  
  ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
  ; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
  ;
  ; void vpx_lpf_vertical_4_neon(uint8_t *s,
  ;                              int p /* pitch */,
  ;                              const uint8_t *blimit,
  ;                              const uint8_t *limit,
-;                              const uint8_t *thresh,
-;                              int count)
+;                              const uint8_t *thresh)
  ;
  ; r0    uint8_t *s,
  ; r1    int p, /* pitch */
  ; r2    const uint8_t *blimit,
  ; r3    const uint8_t *limit,
  ; sp    const uint8_t *thresh,
-; sp+4  int count
  |vpx_lpf_vertical_4_neon| PROC
      push        {lr}
  
      vld1.8      {d0[]}, [r2]              ; duplicate *blimit
-    ldr         r12, [sp, #8]             ; load count
      vld1.8      {d1[]}, [r3]              ; duplicate *limit
  
      ldr         r3, [sp, #4]              ; load thresh
      sub         r2, r0, #4                ; move s pointer down by 4 columns
-    cmp         r12, #0
-    beq         end_vpx_lf_v_edge
  
      vld1.8      {d2[]}, [r3]              ; duplicate *thresh
  
-count_lf_v_loop
      vld1.u8     {d3}, [r2], r1             ; load s data
      vld1.u8     {d4}, [r2], r1
      vld1.u8     {d5}, [r2], r1
@@ -149,12 +141,6 @@ count_lf_v_loop
      vst4.8      {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
      vst4.8      {d4[7], d5[7], d6[7], d7[7]}, [r0]
  
-    add         r0, r0, r1, lsl #3         ; s += pitch * 8
-    subs        r12, r12, #1
-    subne       r2, r0, #4                 ; move s pointer down by 4 columns
-    bne         count_lf_v_loop
-
-end_vpx_lf_v_edge
      pop         {pc}
      ENDP        ; |vpx_lpf_vertical_4_neon|
  
diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c

index 7ad411a..db9ea6a 100644 (file)
--- a/vpx_dsp/arm/loopfilter_4_neon.c
+++ b/vpx_dsp/arm/loopfilter_4_neon.c
@@ -170,8 +170,7 @@ void vpx_lpf_vertical_4_neon(
          int pitch,
          const uint8_t *blimit,
          const uint8_t *limit,
-        const uint8_t *thresh,
-        int count) {
+        const uint8_t *thresh) {
      int i, pitch8;
      uint8_t *s;
      uint8x8_t dblimit, dlimit, dthresh;
@@ -181,15 +180,12 @@ void vpx_lpf_vertical_4_neon(
      uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
      uint8x8x4_t d4Result;
  
-    if (count == 0)  // end_vpx_lf_h_edge
-        return;
-
      dblimit = vld1_u8(blimit);
      dlimit = vld1_u8(limit);
      dthresh = vld1_u8(thresh);
  
      pitch8 = pitch * 8;
-    for (i = 0; i < count; i++, src += pitch8) {
+    for (i = 0; i < 1; i++, src += pitch8) {
          s = src - (i + 1) * 4;
  
          d3u8 = vld1_u8(s);
diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c

index 5814105..b01944e 100644 (file)
--- a/vpx_dsp/arm/loopfilter_neon.c
+++ b/vpx_dsp/arm/loopfilter_neon.c
@@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
                                    const uint8_t *blimit1,
                                    const uint8_t *limit1,
                                    const uint8_t *thresh1) {
-  vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
-  vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+  vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
+  vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
  }
  
  #if HAVE_NEON_ASM
diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c

index fdb5dbb..1604fdb 100644 (file)
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -143,13 +143,12 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
  }
  
  void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
-                          const uint8_t *limit, const uint8_t *thresh,
-                          int count) {
+                          const uint8_t *limit, const uint8_t *thresh) {
    int i;
  
    // loop filter designed to work using chars so that we can make maximum use
    // of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {
+  for (i = 0; i < 8; ++i) {
      const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
      const uint8_t q0 = s[0],  q1 = s[1],  q2 = s[2],  q3 = s[3];
      const int8_t mask = filter_mask(*limit, *blimit,
@@ -163,9 +162,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
                                 const uint8_t *limit0, const uint8_t *thresh0,
                                 const uint8_t *blimit1, const uint8_t *limit1,
                                 const uint8_t *thresh1) {
-  vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
-  vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
-                                  thresh1, 1);
+  vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
+  vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
  }
  
  static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c

index daf5f38..ebeaddd 100644 (file)
--- a/vpx_dsp/mips/loopfilter_4_msa.c
+++ b/vpx_dsp/mips/loopfilter_4_msa.c
@@ -74,14 +74,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
  void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
                              const uint8_t *b_limit_ptr,
                              const uint8_t *limit_ptr,
-                            const uint8_t *thresh_ptr,
-                            int32_t count) {
+                            const uint8_t *thresh_ptr) {
    v16u8 mask, hev, flat, limit, thresh, b_limit;
    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
    v8i16 vec0, vec1, vec2, vec3;
  
-  (void)count;
-
    LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
  
    thresh = (v16u8)__msa_fill_b(*thresh_ptr);
diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c

index 529df4e..9924982 100644 (file)
--- a/vpx_dsp/mips/loopfilter_filters_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c
@@ -117,8 +117,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s,
                                int pitch,
                                const uint8_t *blimit,
                                const uint8_t *limit,
-                              const uint8_t *thresh,
-                              int count) {
+                              const uint8_t *thresh) {
    uint8_t   i;
    uint32_t  mask, hev;
    uint32_t  pm1, p0, p1, p2, p3, p4, p5, p6;
@@ -335,8 +334,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
                                     const uint8_t *blimit1,
                                     const uint8_t *limit1,
                                     const uint8_t *thresh1) {
-  vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
-  vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+  vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
+  vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
  }
  
  void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index feaf0ae..eeb03b6 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -542,7 +542,7 @@ add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_
  specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
  $vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
  
-add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
  specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
  
  add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm

index b9c18b6..dee565c 100644 (file)
--- a/vpx_dsp/x86/loopfilter_mmx.asm
+++ b/vpx_dsp/x86/loopfilter_mmx.asm
@@ -230,14 +230,13 @@ sym(vpx_lpf_horizontal_4_mmx):
  ;    int  src_pixel_step,
  ;    const char *blimit,
  ;    const char *limit,
-;    const char *thresh,
-;    int count
+;    const char *thresh
  ;)
  global sym(vpx_lpf_vertical_4_mmx) PRIVATE
  sym(vpx_lpf_vertical_4_mmx):
      push        rbp
      mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
+    SHADOW_ARGS_TO_STACK 5
      GET_GOT     rbx
      push        rsi
      push        rdi
@@ -254,8 +253,6 @@ sym(vpx_lpf_vertical_4_mmx):
  
          lea         rsi,        [rsi + rax*4 - 4]
  
-        movsxd      rcx,        dword ptr arg(5) ;count
-.next8_v:
          mov         rdi,        rsi           ; rdi points to row +1 for indirect addressing
          add         rdi,        rax
  
@@ -579,10 +576,6 @@ sym(vpx_lpf_vertical_4_mmx):
  
          movd        [rdi+rax*2+2], mm5
  
-        lea         rsi,        [rsi+rax*8]
-        dec         rcx
-        jnz         .next8_v
-
      add rsp, 64
      pop rsp
      ; begin epilog
author	James Zern <jzern@google.com>
	Fri, 12 Feb 2016 03:54:51 +0000 (19:54 -0800)
committer	James Zern <jzern@google.com>
	Tue, 16 Feb 2016 22:59:00 +0000 (14:59 -0800)
test/lpf_8_test.cc		patch \| blob \| history
vp10/common/loopfilter.c		patch \| blob \| history
vp9/common/vp9_loopfilter.c		patch \| blob \| history
vpx_dsp/arm/loopfilter_4_neon.asm		patch \| blob \| history
vpx_dsp/arm/loopfilter_4_neon.c		patch \| blob \| history
vpx_dsp/arm/loopfilter_neon.c		patch \| blob \| history
vpx_dsp/loopfilter.c		patch \| blob \| history
vpx_dsp/mips/loopfilter_4_msa.c		patch \| blob \| history
vpx_dsp/mips/loopfilter_filters_dspr2.c		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history
vpx_dsp/x86/loopfilter_mmx.asm		patch \| blob \| history