percpu: use metadata blocks to update the chunk contig hint
authorDennis Zhou (Facebook) <dennisszhou@gmail.com>
Mon, 24 Jul 2017 23:02:18 +0000 (19:02 -0400)
committerTejun Heo <tj@kernel.org>
Wed, 26 Jul 2017 21:41:06 +0000 (17:41 -0400)
The largest free region will either be a block level contig hint or an
aggregate over the left_free and right_free areas of blocks. This is a
much smaller set of free areas that need to be checked than a full
traverse.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
mm/percpu.c

index 57b3168..0f05647 100644 (file)
@@ -306,6 +306,67 @@ static unsigned long pcpu_block_off_to_off(int index, int off)
 }
 
 /**
+ * pcpu_next_md_free_region - finds the next hint free area
+ * @chunk: chunk of interest
+ * @bit_off: chunk offset
+ * @bits: size of free area
+ *
+ * Helper function for pcpu_for_each_md_free_region.  It checks
+ * block->contig_hint and performs aggregation across blocks to find the
+ * next hint.  It modifies bit_off and bits in-place to be consumed in the
+ * loop.
+ */
+static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
+                                    int *bits)
+{
+       int i = pcpu_off_to_block_index(*bit_off);
+       int block_off = pcpu_off_to_block_off(*bit_off);
+       struct pcpu_block_md *block;
+
+       *bits = 0;
+       for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
+            block++, i++) {
+               /* handles contig area across blocks */
+               if (*bits) {
+                       *bits += block->left_free;
+                       if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
+                               continue;
+                       return;
+               }
+
+               /*
+                * This checks three things.  First is there a contig_hint to
+                * check.  Second, have we checked this hint before by
+                * comparing the block_off.  Third, is this the same as the
+                * right contig hint.  In the last case, it spills over into
+                * the next block and should be handled by the contig area
+                * across blocks code.
+                */
+               *bits = block->contig_hint;
+               if (*bits && block->contig_hint_start >= block_off &&
+                   *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) {
+                       *bit_off = pcpu_block_off_to_off(i,
+                                       block->contig_hint_start);
+                       return;
+               }
+
+               *bits = block->right_free;
+               *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
+       }
+}
+
+/*
+ * Metadata free area iterators.  These perform aggregation of free areas
+ * based on the metadata blocks and return the offset @bit_off and size in
+ * bits of the free area @bits.
+ */
+#define pcpu_for_each_md_free_region(chunk, bit_off, bits)             \
+       for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits));    \
+            (bit_off) < pcpu_chunk_map_bits((chunk));                  \
+            (bit_off) += (bits) + 1,                                   \
+            pcpu_next_md_free_region((chunk), &(bit_off), &(bits)))
+
+/**
  * pcpu_mem_zalloc - allocate memory
  * @size: bytes to allocate
  *
@@ -425,29 +486,28 @@ static void pcpu_chunk_update(struct pcpu_chunk *chunk, int bit_off, int bits)
  * pcpu_chunk_refresh_hint - updates metadata about a chunk
  * @chunk: chunk of interest
  *
- * Iterates over the chunk to find the largest free area.
+ * Iterates over the metadata blocks to find the largest contig area.
+ * It also counts the populated pages and uses the delta to update the
+ * global count.
  *
  * Updates:
  *      chunk->contig_bits
  *      chunk->contig_bits_start
- *      nr_empty_pop_pages
+ *      nr_empty_pop_pages (chunk and global)
  */
 static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk)
 {
-       int bits, nr_empty_pop_pages;
-       int rs, re; /* region start, region end */
+       int bit_off, bits, nr_empty_pop_pages;
 
        /* clear metadata */
        chunk->contig_bits = 0;
 
+       bit_off = chunk->first_bit;
        bits = nr_empty_pop_pages = 0;
-       pcpu_for_each_unpop_region(chunk->alloc_map, rs, re, chunk->first_bit,
-                                  pcpu_chunk_map_bits(chunk)) {
-               bits = re - rs;
-
-               pcpu_chunk_update(chunk, rs, bits);
+       pcpu_for_each_md_free_region(chunk, bit_off, bits) {
+               pcpu_chunk_update(chunk, bit_off, bits);
 
-               nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, rs, bits);
+               nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, bit_off, bits);
        }
 
        /*