From: James Zern Date: Fri, 12 Feb 2016 03:54:51 +0000 (-0800) Subject: vpx_lpf_vertical_4: remove unused count param X-Git-Tag: v1.6.0~349^2~10 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=109a47b3426d302df201295aeff9cf0e40badf69;p=platform%2Fupstream%2Flibvpx.git vpx_lpf_vertical_4: remove unused count param Change-Id: I43a191cb3d42e51e7bca266adfa11c6239a8064c --- diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 9697c88..5c83f3a 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -459,7 +459,8 @@ INSTANTIATE_TEST_CASE_P( MMX, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_lpf_vertical_4_mmx, &vpx_lpf_vertical_4_c, 8, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); #endif // HAVE_MMX #if HAVE_SSE2 @@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_lpf_vertical_4_neon, - &vpx_lpf_vertical_4_c, 8, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values( @@ -637,7 +638,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 4171c1e..9f55dc2 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } @@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } } @@ -1128,11 +1127,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch, } else if (mask_8x8 & 1) { vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 8c281c2..e892f78 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } @@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } } @@ -1103,11 +1102,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch, } else if (mask_8x8 & 1) { vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm index e45e34c..d794f55 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.asm +++ b/vpx_dsp/arm/loopfilter_4_neon.asm @@ -79,37 +79,29 @@ end_vpx_lf_h_edge ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_vertical_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_vertical_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #8] ; load count vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #4] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns - cmp r12, #0 - beq end_vpx_lf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh -count_lf_v_loop vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 @@ -149,12 +141,6 @@ count_lf_v_loop vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1 vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0] - add r0, r0, r1, lsl #3 ; s += pitch * 8 - subs r12, r12, #1 - subne r2, r0, #4 ; move s pointer down by 4 columns - bne count_lf_v_loop - -end_vpx_lf_v_edge pop {pc} ENDP ; |vpx_lpf_vertical_4_neon| diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c index 7ad411a..db9ea6a 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.c +++ b/vpx_dsp/arm/loopfilter_4_neon.c @@ -170,8 +170,7 @@ void vpx_lpf_vertical_4_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i, pitch8; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; @@ -181,15 +180,12 @@ void vpx_lpf_vertical_4_neon( uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x4_t d4Result; - if (count == 0) // end_vpx_lf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); pitch8 = pitch * 8; - for (i = 0; i < count; i++, src += pitch8) { + for (i = 0; i < 1; i++, src += pitch8) { s = src - (i + 1) * 4; d3u8 = vld1_u8(s); diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c index 5814105..b01944e 100644 --- a/vpx_dsp/arm/loopfilter_neon.c +++ b/vpx_dsp/arm/loopfilter_neon.c @@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1); } #if HAVE_NEON_ASM diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index fdb5dbb..1604fdb 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -143,13 +143,12 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, @@ -163,9 +162,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1); + vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c index daf5f38..ebeaddd 100644 --- a/vpx_dsp/mips/loopfilter_4_msa.c +++ b/vpx_dsp/mips/loopfilter_4_msa.c @@ -74,14 +74,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { v16u8 mask, hev, flat, limit, thresh, b_limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v8i16 vec0, vec1, vec2, vec3; - (void)count; - LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index 529df4e..9924982 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -117,8 +117,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; @@ -335,8 +334,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index feaf0ae..eeb03b6 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -542,7 +542,7 @@ add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_ specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon; -add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/; add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm index b9c18b6..dee565c 100644 --- a/vpx_dsp/x86/loopfilter_mmx.asm +++ b/vpx_dsp/x86/loopfilter_mmx.asm @@ -230,14 +230,13 @@ sym(vpx_lpf_horizontal_4_mmx): ; int src_pixel_step, ; const char *blimit, ; const char *limit, -; const char *thresh, -; int count +; const char *thresh ;) global sym(vpx_lpf_vertical_4_mmx) PRIVATE sym(vpx_lpf_vertical_4_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi @@ -254,8 +253,6 @@ sym(vpx_lpf_vertical_4_mmx): lea rsi, [rsi + rax*4 - 4] - movsxd rcx, dword ptr arg(5) ;count -.next8_v: mov rdi, rsi ; rdi points to row +1 for indirect addressing add rdi, rax @@ -579,10 +576,6 @@ sym(vpx_lpf_vertical_4_mmx): movd [rdi+rax*2+2], mm5 - lea rsi, [rsi+rax*8] - dec rcx - jnz .next8_v - add rsp, 64 pop rsp ; begin epilog