From b26232eb1b2e7253687770b9b5b76a2d465e70bb Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Tue, 31 May 2016 10:38:01 -0700 Subject: [PATCH] Update filter_selectively_vert_row2() Reduce operations and jumps. perf shows CPU time reduced from 1.9% to 1.6% when decoding fdJc1_IBKJA.248.webm on Xeon E5. Will apply the changes to vp10 after code review. Change-Id: I9351509922855d8896ddef1ed093b3ca12619a61 --- vp9/common/vp9_loopfilter.c | 414 +++++++++++++++++++------------------------- 1 file changed, 179 insertions(+), 235 deletions(-) diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 4614625..d41d45e 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -298,196 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { - const int mask_shift = subsampling_factor ? 4 : 8; - const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; - - unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; - unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; - unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; - unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; - unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; - unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; + const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; - mask; mask >>= 1) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - if ((mask_16x16_0 & mask_16x16_1) & 1) { - vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); - } else if (mask_16x16_0 & 1) { - vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + uint8_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr); } else { - vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); } } - if ((mask_8x8_0 | mask_8x8_1) & 1) { - if ((mask_8x8_0 & mask_8x8_1) & 1) { - vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_8x8_0 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); } else { - vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); } else { - vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } - if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { - if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_int_0 & 1) { - vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr); } else { - vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); } } } - s += 8; + ss[0] += 8; lfl += 1; - mask_16x16_0 >>= 1; - mask_8x8_0 >>= 1; - mask_4x4_0 >>= 1; - mask_4x4_int_0 >>= 1; - mask_16x16_1 >>= 1; - mask_8x8_1 >>= 1; - mask_4x4_1 >>= 1; - mask_4x4_int_1 >>= 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_filter_selectively_vert_row2(int subsampling_factor, uint16_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { - const int mask_shift = subsampling_factor ? 4 : 8; - const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; - - unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; - unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; - unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; - unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; - unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; - unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; + const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; - mask; mask >>= 1) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - if ((mask_16x16_0 & mask_16x16_1) & 1) { - vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); - } else if (mask_16x16_0 & 1) { - vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); + uint16_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_8x8_0 | mask_8x8_1) & 1) { - if ((mask_8x8_0 & mask_8x8_1) & 1) { - vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_8x8_0 & 1) { - vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_0 & 1) { - vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { - if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_int_0 & 1) { - vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, + lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } } - s += 8; + ss[0] += 8; lfl += 1; - mask_16x16_0 >>= 1; - mask_8x8_0 >>= 1; - mask_4x4_0 >>= 1; - mask_4x4_int_0 >>= 1; - mask_16x16_1 >>= 1; - mask_8x8_1 >>= 1; - mask_4x4_1 >>= 1; - mask_4x4_int_1 >>= 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -497,17 +469,17 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - count = 1; if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, @@ -520,7 +492,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -549,7 +521,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -574,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } - } else if (mask_4x4_int & 1) { + } else { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } @@ -594,17 +566,17 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - count = 1; if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, @@ -617,7 +589,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -650,7 +622,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -679,7 +651,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, lfi->lim, lfi->hev_thr, bd); } } - } else if (mask_4x4_int & 1) { + } else { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } @@ -1079,13 +1051,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { @@ -1113,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { @@ -1250,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3], + cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mi_8x8 += row_step_stride; @@ -1299,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3], + cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; } @@ -1337,27 +1299,20 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, // Vertical pass: do 2 rows at one time for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - unsigned int mask_16x16_l = mask_16x16 & 0xffff; - unsigned int mask_8x8_l = mask_8x8 & 0xffff; - unsigned int mask_4x4_l = mask_4x4 & 0xffff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; - -// Disable filtering on the leftmost column. + // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + mask_16x16, mask_8x8, mask_4x4, mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); + plane->subsampling_x, dst->buf, dst->stride, mask_16x16, mask_8x8, + mask_4x4, mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; mask_16x16 >>= 16; @@ -1390,19 +1345,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz( - CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1436,38 +1390,29 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; } - { - unsigned int mask_16x16_l = mask_16x16 & 0xff; - unsigned int mask_8x8_l = mask_8x8 & 0xff; - unsigned int mask_4x4_l = mask_4x4 & 0xff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; - -// Disable filtering on the leftmost column. + // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2( - plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1], (int)cm->bit_depth); - } else { - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1]); - } -#else - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1]); + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16, mask_8x8, + mask_4x4, mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { #endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + mask_16x16, mask_8x8, mask_4x4, mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; } // Horizontal pass @@ -1499,17 +1444,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, if (cm->use_highbitdepth) { highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfl_uv[r << 1], (int)cm->bit_depth); + mask_4x4_r, mask_4x4_int_r, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; -- 2.7.4