Update filter_selectively_vert_row2()
authorLinfeng Zhang <linfengz@google.com>
Tue, 31 May 2016 17:38:01 +0000 (10:38 -0700)
committerLinfeng Zhang <linfengz@google.com>
Wed, 1 Jun 2016 18:20:47 +0000 (11:20 -0700)
Reduce operations and jumps. perf shows CPU time reduced from 1.9% to
1.6% when decoding fdJc1_IBKJA.248.webm on Xeon E5.
Will apply the changes to vp10 after code review.

Change-Id: I9351509922855d8896ddef1ed093b3ca12619a61

vp9/common/vp9_loopfilter.c

index 4614625..d41d45e 100644 (file)
@@ -298,196 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
 
 static void filter_selectively_vert_row2(int subsampling_factor,
                                          uint8_t *s, int pitch,
-                                         unsigned int mask_16x16_l,
-                                         unsigned int mask_8x8_l,
-                                         unsigned int mask_4x4_l,
-                                         unsigned int mask_4x4_int_l,
-                                         const loop_filter_info_n *lfi_n,
+                                         unsigned int mask_16x16,
+                                         unsigned int mask_8x8,
+                                         unsigned int mask_4x4,
+                                         unsigned int mask_4x4_int,
+                                         const loop_filter_thresh *lfthr,
                                          const uint8_t *lfl) {
-  const int mask_shift = subsampling_factor ? 4 : 8;
-  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
   const int lfl_forward = subsampling_factor ? 4 : 8;
-
-  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
-  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
-  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
-  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
-  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+  const unsigned int dual_one = 1 | (1 << lfl_forward);
   unsigned int mask;
-
-  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
-              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
-       mask; mask >>= 1) {
-    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
-    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
-
-    if (mask & 1) {
-      if ((mask_16x16_0 | mask_16x16_1) & 1) {
-        if ((mask_16x16_0 & mask_16x16_1) & 1) {
-          vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                   lfi0->hev_thr);
-        } else if (mask_16x16_0 & 1) {
-          vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
-                              lfi0->hev_thr);
+  uint8_t *ss[2];
+  ss[0] = s;
+
+  for (mask =
+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+       mask; mask = (mask & ~dual_one) >> 1) {
+    if (mask & dual_one) {
+      const loop_filter_thresh *lfis[2];
+      lfis[0] = lfthr + *lfl;
+      lfis[1] = lfthr + *(lfl + lfl_forward);
+      ss[1] = ss[0] + 8 * pitch;
+
+      if (mask_16x16 & dual_one) {
+        if ((mask_16x16 & dual_one) == dual_one) {
+          vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                   lfis[0]->hev_thr);
         } else {
-          vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
-                              lfi1->lim, lfi1->hev_thr);
+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+          vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+                              lfi->lim, lfi->hev_thr);
         }
       }
 
-      if ((mask_8x8_0 | mask_8x8_1) & 1) {
-        if ((mask_8x8_0 & mask_8x8_1) & 1) {
-          vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                  lfi1->hev_thr);
-        } else if (mask_8x8_0 & 1) {
-          vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+      if (mask_8x8 & dual_one) {
+        if ((mask_8x8 & dual_one) == dual_one) {
+          vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                  lfis[0]->hev_thr, lfis[1]->mblim,
+                                  lfis[1]->lim, lfis[1]->hev_thr);
         } else {
-          vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr);
+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+          vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
         }
       }
 
-      if ((mask_4x4_0 | mask_4x4_1) & 1) {
-        if ((mask_4x4_0 & mask_4x4_1) & 1) {
-          vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                  lfi1->hev_thr);
-        } else if (mask_4x4_0 & 1) {
-          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+      if (mask_4x4 & dual_one) {
+        if ((mask_4x4 & dual_one) == dual_one) {
+          vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+                                  lfis[0]->hev_thr, lfis[1]->mblim,
+                                  lfis[1]->lim, lfis[1]->hev_thr);
         } else {
-          vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+          vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
+                             lfi->hev_thr);
         }
       }
 
-      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
-        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
-          vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                  lfi1->hev_thr);
-        } else if (mask_4x4_int_0 & 1) {
-          vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                             lfi0->hev_thr);
+      if (mask_4x4_int & dual_one) {
+        if ((mask_4x4_int & dual_one) == dual_one) {
+          vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+                                  lfis[0]->lim, lfis[0]->hev_thr,
+                                  lfis[1]->mblim, lfis[1]->lim,
+                                  lfis[1]->hev_thr);
         } else {
-          vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
-                             lfi1->hev_thr);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+          vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
+                             lfi->lim, lfi->hev_thr);
         }
       }
     }
 
-    s += 8;
+    ss[0] += 8;
     lfl += 1;
-    mask_16x16_0 >>= 1;
-    mask_8x8_0 >>= 1;
-    mask_4x4_0 >>= 1;
-    mask_4x4_int_0 >>= 1;
-    mask_16x16_1 >>= 1;
-    mask_8x8_1 >>= 1;
-    mask_4x4_1 >>= 1;
-    mask_4x4_int_1 >>= 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
   }
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_filter_selectively_vert_row2(int subsampling_factor,
                                                 uint16_t *s, int pitch,
-                                                unsigned int mask_16x16_l,
-                                                unsigned int mask_8x8_l,
-                                                unsigned int mask_4x4_l,
-                                                unsigned int mask_4x4_int_l,
-                                                const loop_filter_info_n *lfi_n,
+                                                unsigned int mask_16x16,
+                                                unsigned int mask_8x8,
+                                                unsigned int mask_4x4,
+                                                unsigned int mask_4x4_int,
+                                                const loop_filter_thresh *lfthr,
                                                 const uint8_t *lfl, int bd) {
-  const int mask_shift = subsampling_factor ? 4 : 8;
-  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+  const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
   const int lfl_forward = subsampling_factor ? 4 : 8;
-
-  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
-  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
-  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
-  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
-  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
-  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+  const unsigned int dual_one = 1 | (1 << lfl_forward);
   unsigned int mask;
-
-  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
-       mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
-       mask; mask >>= 1) {
-    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
-    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
-
-    if (mask & 1) {
-      if ((mask_16x16_0 | mask_16x16_1) & 1) {
-        if ((mask_16x16_0 & mask_16x16_1) & 1) {
-          vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                          lfi0->hev_thr, bd);
-        } else if (mask_16x16_0 & 1) {
-          vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
-                                     lfi0->hev_thr, bd);
+  uint16_t *ss[2];
+  ss[0] = s;
+
+  for (mask =
+           (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+       mask; mask = (mask & ~dual_one) >> 1) {
+    if (mask & dual_one) {
+      const loop_filter_thresh *lfis[2];
+      lfis[0] = lfthr + *lfl;
+      lfis[1] = lfthr + *(lfl + lfl_forward);
+      ss[1] = ss[0] + 8 * pitch;
+
+      if (mask_16x16 & dual_one) {
+        if ((mask_16x16 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
+                                          lfis[0]->lim, lfis[0]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
-                                     lfi1->lim, lfi1->hev_thr, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+          vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+                                     lfi->lim, lfi->hev_thr, bd);
         }
       }
 
-      if ((mask_8x8_0 | mask_8x8_1) & 1) {
-        if ((mask_8x8_0 & mask_8x8_1) & 1) {
-          vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                         lfi1->hev_thr, bd);
-        } else if (mask_8x8_0 & 1) {
-          vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
-                                    lfi0->hev_thr, bd);
+      if (mask_8x8 & dual_one) {
+        if ((mask_8x8 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
+                                         lfis[0]->lim, lfis[0]->hev_thr,
+                                         lfis[1]->mblim, lfis[1]->lim,
+                                         lfis[1]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
-                                    lfi1->lim, lfi1->hev_thr, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+          vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
+                                    lfi->lim, lfi->hev_thr, bd);
         }
       }
 
-      if ((mask_4x4_0 | mask_4x4_1) & 1) {
-        if ((mask_4x4_0 & mask_4x4_1) & 1) {
-          vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                         lfi1->hev_thr, bd);
-        } else if (mask_4x4_0 & 1) {
-          vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
-                                    lfi0->hev_thr, bd);
+      if (mask_4x4 & dual_one) {
+        if ((mask_4x4 & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
+                                         lfis[0]->lim, lfis[0]->hev_thr,
+                                         lfis[1]->mblim, lfis[1]->lim,
+                                         lfis[1]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
-                                    lfi1->lim, lfi1->hev_thr, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
+                                    lfi->lim, lfi->hev_thr, bd);
         }
       }
 
-      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
-        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
-          vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
-                                         lfi1->hev_thr, bd);
-        } else if (mask_4x4_int_0 & 1) {
-          vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
-                                    lfi0->hev_thr, bd);
+      if (mask_4x4_int & dual_one) {
+        if ((mask_4x4_int & dual_one) == dual_one) {
+          vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+                                         lfis[0]->lim, lfis[0]->hev_thr,
+                                         lfis[1]->mblim, lfis[1]->lim,
+                                         lfis[1]->hev_thr, bd);
         } else {
-          vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
-                                    lfi1->lim, lfi1->hev_thr, bd);
+          const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+          vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
+                                    lfi->mblim, lfi->lim, lfi->hev_thr, bd);
         }
       }
     }
 
-    s += 8;
+    ss[0] += 8;
     lfl += 1;
-    mask_16x16_0 >>= 1;
-    mask_8x8_0 >>= 1;
-    mask_4x4_0 >>= 1;
-    mask_4x4_int_0 >>= 1;
-    mask_16x16_1 >>= 1;
-    mask_8x8_1 >>= 1;
-    mask_4x4_1 >>= 1;
-    mask_4x4_int_1 >>= 1;
+    mask_16x16 >>= 1;
+    mask_8x8 >>= 1;
+    mask_4x4 >>= 1;
+    mask_4x4_int >>= 1;
   }
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -497,17 +469,17 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
                                      unsigned int mask_8x8,
                                      unsigned int mask_4x4,
                                      unsigned int mask_4x4_int,
-                                     const loop_filter_info_n *lfi_n,
+                                     const loop_filter_thresh *lfthr,
                                      const uint8_t *lfl) {
   unsigned int mask;
   int count;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= count) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
     count = 1;
     if (mask & 1) {
+      const loop_filter_thresh *lfi = lfthr + *lfl;
+
       if (mask_16x16 & 1) {
         if ((mask_16x16 & 3) == 3) {
           vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
@@ -520,7 +492,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
       } else if (mask_8x8 & 1) {
         if ((mask_8x8 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -549,7 +521,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
       } else if (mask_4x4 & 1) {
         if ((mask_4x4 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -574,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
             vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                  lfi->hev_thr);
         }
-      } else if (mask_4x4_int & 1) {
+      } else {
         vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                              lfi->hev_thr);
       }
@@ -594,17 +566,17 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
                                             unsigned int mask_8x8,
                                             unsigned int mask_4x4,
                                             unsigned int mask_4x4_int,
-                                            const loop_filter_info_n *lfi_n,
+                                            const loop_filter_thresh *lfthr,
                                             const uint8_t *lfl, int bd) {
   unsigned int mask;
   int count;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= count) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
     count = 1;
     if (mask & 1) {
+      const loop_filter_thresh *lfi = lfthr + *lfl;
+
       if (mask_16x16 & 1) {
         if ((mask_16x16 & 3) == 3) {
           vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
@@ -617,7 +589,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
       } else if (mask_8x8 & 1) {
         if ((mask_8x8 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
                                            lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -650,7 +622,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
       } else if (mask_4x4 & 1) {
         if ((mask_4x4 & 3) == 3) {
           // Next block's thresholds.
-          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+          const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
 
           vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
                                            lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -679,7 +651,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
                                         lfi->lim, lfi->hev_thr, bd);
           }
         }
-      } else if (mask_4x4_int & 1) {
+      } else {
         vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr, bd);
       }
@@ -1079,13 +1051,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
                                     unsigned int mask_8x8,
                                     unsigned int mask_4x4,
                                     unsigned int mask_4x4_int,
-                                    const loop_filter_info_n *lfi_n,
+                                    const loop_filter_thresh *lfthr,
                                     const uint8_t *lfl) {
   unsigned int mask;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= 1) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+    const loop_filter_thresh *lfi = lfthr + *lfl;
 
     if (mask & 1) {
       if (mask_16x16 & 1) {
@@ -1113,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
                                            unsigned int mask_8x8,
                                            unsigned int mask_4x4,
                                            unsigned int mask_4x4_int,
-                                           const loop_filter_info_n *lfi_n,
+                                           const loop_filter_thresh *lfthr,
                                            const uint8_t *lfl, int bd) {
   unsigned int mask;
 
   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
        mask; mask >>= 1) {
-    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+    const loop_filter_thresh *lfi = lfthr + *lfl;
 
     if (mask & 1) {
       if (mask_16x16 & 1) {
@@ -1250,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
                                      mask_8x8_c & border_mask,
                                      mask_4x4_c & border_mask,
                                      mask_4x4_int[r],
-                                     &cm->lf_info, &lfl[r << 3],
+                                     cm->lf_info.lfthr, &lfl[r << 3],
                                      (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_vert(dst->buf, dst->stride,
                               mask_16x16_c & border_mask,
                               mask_8x8_c & border_mask,
                               mask_4x4_c & border_mask,
                               mask_4x4_int[r],
-                              &cm->lf_info, &lfl[r << 3]);
+                              cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_vert(dst->buf, dst->stride,
-                            mask_16x16_c & border_mask,
-                            mask_8x8_c & border_mask,
-                            mask_4x4_c & border_mask,
-                            mask_4x4_int[r],
-                            &cm->lf_info, &lfl[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     dst->buf += 8 * dst->stride;
     mi_8x8 += row_step_stride;
@@ -1299,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
                                       mask_8x8_r,
                                       mask_4x4_r,
                                       mask_4x4_int_r,
-                                      &cm->lf_info, &lfl[r << 3],
+                                      cm->lf_info.lfthr, &lfl[r << 3],
                                       (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_horiz(dst->buf, dst->stride,
                                mask_16x16_r,
                                mask_8x8_r,
                                mask_4x4_r,
                                mask_4x4_int_r,
-                               &cm->lf_info, &lfl[r << 3]);
+                               cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_horiz(dst->buf, dst->stride,
-                             mask_16x16_r,
-                             mask_8x8_r,
-                             mask_4x4_r,
-                             mask_4x4_int_r,
-                             &cm->lf_info, &lfl[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     dst->buf += 8 * dst->stride;
   }
@@ -1337,27 +1299,20 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
 
   // Vertical pass: do 2 rows at one time
   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
-    unsigned int mask_16x16_l = mask_16x16 & 0xffff;
-    unsigned int mask_8x8_l = mask_8x8 & 0xffff;
-    unsigned int mask_4x4_l = mask_4x4 & 0xffff;
-    unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
-
-// Disable filtering on the leftmost column.
+    // Disable filtering on the leftmost column.
 #if CONFIG_VP9_HIGHBITDEPTH
     if (cm->use_highbitdepth) {
       highbd_filter_selectively_vert_row2(
           plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
-          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+          mask_16x16, mask_8x8, mask_4x4, mask_4x4_int, cm->lf_info.lfthr,
           &lfm->lfl_y[r << 3], (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_vert_row2(
-          plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
-          mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
+          plane->subsampling_x, dst->buf, dst->stride, mask_16x16, mask_8x8,
+          mask_4x4, mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_vert_row2(
-        plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
-        mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     dst->buf += 16 * dst->stride;
     mask_16x16 >>= 16;
@@ -1390,19 +1345,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
 
 #if CONFIG_VP9_HIGHBITDEPTH
     if (cm->use_highbitdepth) {
-      highbd_filter_selectively_horiz(
-          CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
-          mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
-          (int)cm->bit_depth);
+      highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+                                      dst->stride, mask_16x16_r, mask_8x8_r,
+                                      mask_4x4_r, mask_4x4_int & 0xff,
+                                      cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
+                                      (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                               mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
-                               &lfm->lfl_y[r << 3]);
+                               mask_4x4_r, mask_4x4_int & 0xff,
+                               cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                             mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
-                             &lfm->lfl_y[r << 3]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
     dst->buf += 8 * dst->stride;
@@ -1436,38 +1390,29 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
       lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
     }
 
-    {
-      unsigned int mask_16x16_l = mask_16x16 & 0xff;
-      unsigned int mask_8x8_l = mask_8x8 & 0xff;
-      unsigned int mask_4x4_l = mask_4x4 & 0xff;
-      unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
-
-// Disable filtering on the leftmost column.
+    // Disable filtering on the leftmost column.
 #if CONFIG_VP9_HIGHBITDEPTH
-      if (cm->use_highbitdepth) {
-        highbd_filter_selectively_vert_row2(
-            plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
-            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-            &lfl_uv[r << 1], (int)cm->bit_depth);
-      } else {
-        filter_selectively_vert_row2(
-            plane->subsampling_x, dst->buf, dst->stride,
-            mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-            &lfl_uv[r << 1]);
-      }
-#else
-      filter_selectively_vert_row2(
-          plane->subsampling_x, dst->buf, dst->stride,
-          mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
-          &lfl_uv[r << 1]);
+    if (cm->use_highbitdepth) {
+      highbd_filter_selectively_vert_row2(plane->subsampling_x,
+                                          CONVERT_TO_SHORTPTR(dst->buf),
+                                          dst->stride, mask_16x16, mask_8x8,
+                                          mask_4x4, mask_4x4_int,
+                                          cm->lf_info.lfthr, &lfl_uv[r << 1],
+                                          (int)cm->bit_depth);
+    } else {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-
-      dst->buf += 16 * dst->stride;
-      mask_16x16 >>= 8;
-      mask_8x8 >>= 8;
-      mask_4x4 >>= 8;
-      mask_4x4_int >>= 8;
+      filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
+                                   mask_16x16, mask_8x8, mask_4x4, mask_4x4_int,
+                                   cm->lf_info.lfthr, &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    dst->buf += 16 * dst->stride;
+    mask_16x16 >>= 8;
+    mask_8x8 >>= 8;
+    mask_4x4 >>= 8;
+    mask_4x4_int >>= 8;
   }
 
   // Horizontal pass
@@ -1499,17 +1444,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
     if (cm->use_highbitdepth) {
       highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
                                       dst->stride, mask_16x16_r, mask_8x8_r,
-                                      mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
-                                      &lfl_uv[r << 1], (int)cm->bit_depth);
+                                      mask_4x4_r, mask_4x4_int_r,
+                                      cm->lf_info.lfthr, &lfl_uv[r << 1],
+                                      (int)cm->bit_depth);
     } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                               mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+                               mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
                                &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
     }
-#else
-    filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
-                             mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
-                             &lfl_uv[r << 1]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
     dst->buf += 8 * dst->stride;