1 // SPDX-License-Identifier: GPL-2.0+
3 * Maple Tree implementation
4 * Copyright (c) 2018-2022 Oracle Corporation
5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
6 * Matthew Wilcox <willy@infradead.org>
10 * DOC: Interesting implementation details of the Maple Tree
12 * Each node type has a number of slots for entries and a number of slots for
13 * pivots. In the case of dense nodes, the pivots are implied by the position
14 * and are simply the slot index + the minimum of the node.
16 * In regular B-Tree terms, pivots are called keys. The term pivot is used to
17 * indicate that the tree is specifying ranges, Pivots may appear in the
18 * subtree with an entry attached to the value where as keys are unique to a
19 * specific position of a B-tree. Pivot values are inclusive of the slot with
23 * The following illustrates the layout of a range64 nodes slots and pivots.
26 * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
28 * │ │ │ │ │ │ │ │ └─ Implied maximum
29 * │ │ │ │ │ │ │ └─ Pivot 14
30 * │ │ │ │ │ │ └─ Pivot 13
31 * │ │ │ │ │ └─ Pivot 12
39 * Internal (non-leaf) nodes contain pointers to other nodes.
40 * Leaf nodes contain entries.
42 * The location of interest is often referred to as an offset. All offsets have
43 * a slot, but the last offset has an implied pivot from the node above (or
44 * UINT_MAX for the root node.
46 * Ranges complicate certain write activities. When modifying any of
47 * the B-tree variants, it is known that one entry will either be added or
48 * deleted. When modifying the Maple Tree, one store operation may overwrite
49 * the entire data set, or one half of the tree, or the middle half of the tree.
54 #include <linux/maple_tree.h>
55 #include <linux/xarray.h>
56 #include <linux/types.h>
57 #include <linux/export.h>
58 #include <linux/slab.h>
59 #include <linux/limits.h>
60 #include <asm/barrier.h>
62 #define CREATE_TRACE_POINTS
63 #include <trace/events/maple_tree.h>
65 #define MA_ROOT_PARENT 1
69 * * MA_STATE_BULK - Bulk insert mode
70 * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
71 * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
73 #define MA_STATE_BULK 1
74 #define MA_STATE_REBALANCE 2
75 #define MA_STATE_PREALLOC 4
77 #define ma_parent_ptr(x) ((struct maple_pnode *)(x))
78 #define ma_mnode_ptr(x) ((struct maple_node *)(x))
79 #define ma_enode_ptr(x) ((struct maple_enode *)(x))
80 static struct kmem_cache *maple_node_cache;
82 #ifdef CONFIG_DEBUG_MAPLE_TREE
83 static const unsigned long mt_max[] = {
84 [maple_dense] = MAPLE_NODE_SLOTS,
85 [maple_leaf_64] = ULONG_MAX,
86 [maple_range_64] = ULONG_MAX,
87 [maple_arange_64] = ULONG_MAX,
89 #define mt_node_max(x) mt_max[mte_node_type(x)]
92 static const unsigned char mt_slots[] = {
93 [maple_dense] = MAPLE_NODE_SLOTS,
94 [maple_leaf_64] = MAPLE_RANGE64_SLOTS,
95 [maple_range_64] = MAPLE_RANGE64_SLOTS,
96 [maple_arange_64] = MAPLE_ARANGE64_SLOTS,
98 #define mt_slot_count(x) mt_slots[mte_node_type(x)]
100 static const unsigned char mt_pivots[] = {
102 [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1,
103 [maple_range_64] = MAPLE_RANGE64_SLOTS - 1,
104 [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1,
106 #define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
108 static const unsigned char mt_min_slots[] = {
109 [maple_dense] = MAPLE_NODE_SLOTS / 2,
110 [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
111 [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2,
112 [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1,
114 #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
116 #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2)
117 #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1)
119 struct maple_big_node {
120 struct maple_pnode *parent;
121 unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
123 struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
125 unsigned long padding[MAPLE_BIG_NODE_GAPS];
126 unsigned long gap[MAPLE_BIG_NODE_GAPS];
130 enum maple_type type;
134 * The maple_subtree_state is used to build a tree to replace a segment of an
135 * existing tree in a more atomic way. Any walkers of the older tree will hit a
136 * dead node and restart on updates.
138 struct maple_subtree_state {
139 struct ma_state *orig_l; /* Original left side of subtree */
140 struct ma_state *orig_r; /* Original right side of subtree */
141 struct ma_state *l; /* New left side of subtree */
142 struct ma_state *m; /* New middle of subtree (rare) */
143 struct ma_state *r; /* New right side of subtree */
144 struct ma_topiary *free; /* nodes to be freed */
145 struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */
146 struct maple_big_node *bn;
150 static inline struct maple_node *mt_alloc_one(gfp_t gfp)
152 return kmem_cache_alloc(maple_node_cache, gfp);
155 static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
157 return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
160 static inline void mt_free_bulk(size_t size, void __rcu **nodes)
162 kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
165 static void mt_free_rcu(struct rcu_head *head)
167 struct maple_node *node = container_of(head, struct maple_node, rcu);
169 kmem_cache_free(maple_node_cache, node);
173 * ma_free_rcu() - Use rcu callback to free a maple node
174 * @node: The node to free
176 * The maple tree uses the parent pointer to indicate this node is no longer in
177 * use and will be freed.
179 static void ma_free_rcu(struct maple_node *node)
181 node->parent = ma_parent_ptr(node);
182 call_rcu(&node->rcu, mt_free_rcu);
186 static void mas_set_height(struct ma_state *mas)
188 unsigned int new_flags = mas->tree->ma_flags;
190 new_flags &= ~MT_FLAGS_HEIGHT_MASK;
191 BUG_ON(mas->depth > MAPLE_HEIGHT_MAX);
192 new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
193 mas->tree->ma_flags = new_flags;
196 static unsigned int mas_mt_height(struct ma_state *mas)
198 return mt_height(mas->tree);
201 static inline enum maple_type mte_node_type(const struct maple_enode *entry)
203 return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
204 MAPLE_NODE_TYPE_MASK;
207 static inline bool ma_is_dense(const enum maple_type type)
209 return type < maple_leaf_64;
212 static inline bool ma_is_leaf(const enum maple_type type)
214 return type < maple_range_64;
217 static inline bool mte_is_leaf(const struct maple_enode *entry)
219 return ma_is_leaf(mte_node_type(entry));
223 * We also reserve values with the bottom two bits set to '10' which are
226 static inline bool mt_is_reserved(const void *entry)
228 return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
229 xa_is_internal(entry);
232 static inline void mas_set_err(struct ma_state *mas, long err)
234 mas->node = MA_ERROR(err);
237 static inline bool mas_is_ptr(struct ma_state *mas)
239 return mas->node == MAS_ROOT;
242 static inline bool mas_is_start(struct ma_state *mas)
244 return mas->node == MAS_START;
247 bool mas_is_err(struct ma_state *mas)
249 return xa_is_err(mas->node);
252 static inline bool mas_searchable(struct ma_state *mas)
254 if (mas_is_none(mas))
263 static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
265 return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
269 * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
270 * @entry: The maple encoded node
272 * Return: a maple topiary pointer
274 static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
276 return (struct maple_topiary *)
277 ((unsigned long)entry & ~MAPLE_NODE_MASK);
281 * mas_mn() - Get the maple state node.
282 * @mas: The maple state
284 * Return: the maple node (not encoded - bare pointer).
286 static inline struct maple_node *mas_mn(const struct ma_state *mas)
288 return mte_to_node(mas->node);
292 * mte_set_node_dead() - Set a maple encoded node as dead.
293 * @mn: The maple encoded node.
295 static inline void mte_set_node_dead(struct maple_enode *mn)
297 mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
298 smp_wmb(); /* Needed for RCU */
301 /* Bit 1 indicates the root is a node */
302 #define MAPLE_ROOT_NODE 0x02
303 /* maple_type stored bit 3-6 */
304 #define MAPLE_ENODE_TYPE_SHIFT 0x03
305 /* Bit 2 means a NULL somewhere below */
306 #define MAPLE_ENODE_NULL 0x04
308 static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
309 enum maple_type type)
311 return (void *)((unsigned long)node |
312 (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
315 static inline void *mte_mk_root(const struct maple_enode *node)
317 return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
320 static inline void *mte_safe_root(const struct maple_enode *node)
322 return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
325 static inline void mte_set_full(const struct maple_enode *node)
327 node = (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
330 static inline void mte_clear_full(const struct maple_enode *node)
332 node = (void *)((unsigned long)node | MAPLE_ENODE_NULL);
335 static inline bool ma_is_root(struct maple_node *node)
337 return ((unsigned long)node->parent & MA_ROOT_PARENT);
340 static inline bool mte_is_root(const struct maple_enode *node)
342 return ma_is_root(mte_to_node(node));
345 static inline bool mas_is_root_limits(const struct ma_state *mas)
347 return !mas->min && mas->max == ULONG_MAX;
350 static inline bool mt_is_alloc(struct maple_tree *mt)
352 return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
357 * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
358 * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16
359 * bit values need an extra bit to store the offset. This extra bit comes from
360 * a reuse of the last bit in the node type. This is possible by using bit 1 to
361 * indicate if bit 2 is part of the type or the slot.
365 * 0x?00 = 16 bit nodes
366 * 0x010 = 32 bit nodes
367 * 0x110 = 64 bit nodes
369 * Slot size and alignment
371 * 0b?00 : 16 bit values, type in 0-1, slot in 2-7
372 * 0b010 : 32 bit values, type in 0-2, slot in 3-7
373 * 0b110 : 64 bit values, type in 0-2, slot in 3-7
376 #define MAPLE_PARENT_ROOT 0x01
378 #define MAPLE_PARENT_SLOT_SHIFT 0x03
379 #define MAPLE_PARENT_SLOT_MASK 0xF8
381 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02
382 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC
384 #define MAPLE_PARENT_RANGE64 0x06
385 #define MAPLE_PARENT_RANGE32 0x04
386 #define MAPLE_PARENT_NOT_RANGE16 0x02
389 * mte_parent_shift() - Get the parent shift for the slot storage.
390 * @parent: The parent pointer cast as an unsigned long
391 * Return: The shift into that pointer to the star to of the slot
393 static inline unsigned long mte_parent_shift(unsigned long parent)
395 /* Note bit 1 == 0 means 16B */
396 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
397 return MAPLE_PARENT_SLOT_SHIFT;
399 return MAPLE_PARENT_16B_SLOT_SHIFT;
403 * mte_parent_slot_mask() - Get the slot mask for the parent.
404 * @parent: The parent pointer cast as an unsigned long.
405 * Return: The slot mask for that parent.
407 static inline unsigned long mte_parent_slot_mask(unsigned long parent)
409 /* Note bit 1 == 0 means 16B */
410 if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
411 return MAPLE_PARENT_SLOT_MASK;
413 return MAPLE_PARENT_16B_SLOT_MASK;
417 * mas_parent_enum() - Return the maple_type of the parent from the stored
419 * @mas: The maple state
420 * @node: The maple_enode to extract the parent's enum
421 * Return: The node->parent maple_type
424 enum maple_type mte_parent_enum(struct maple_enode *p_enode,
425 struct maple_tree *mt)
427 unsigned long p_type;
429 p_type = (unsigned long)p_enode;
430 if (p_type & MAPLE_PARENT_ROOT)
431 return 0; /* Validated in the caller. */
433 p_type &= MAPLE_NODE_MASK;
434 p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
437 case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
439 return maple_arange_64;
440 return maple_range_64;
447 enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode)
449 return mte_parent_enum(ma_enode_ptr(mte_to_node(enode)->parent), mas->tree);
453 * mte_set_parent() - Set the parent node and encode the slot
454 * @enode: The encoded maple node.
455 * @parent: The encoded maple node that is the parent of @enode.
456 * @slot: The slot that @enode resides in @parent.
458 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
462 void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent,
465 unsigned long val = (unsigned long) parent;
468 enum maple_type p_type = mte_node_type(parent);
470 BUG_ON(p_type == maple_dense);
471 BUG_ON(p_type == maple_leaf_64);
475 case maple_arange_64:
476 shift = MAPLE_PARENT_SLOT_SHIFT;
477 type = MAPLE_PARENT_RANGE64;
486 val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
487 val |= (slot << shift) | type;
488 mte_to_node(enode)->parent = ma_parent_ptr(val);
492 * mte_parent_slot() - get the parent slot of @enode.
493 * @enode: The encoded maple node.
495 * Return: The slot in the parent node where @enode resides.
497 static inline unsigned int mte_parent_slot(const struct maple_enode *enode)
499 unsigned long val = (unsigned long) mte_to_node(enode)->parent;
506 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
507 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
509 return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
513 * mte_parent() - Get the parent of @node.
514 * @node: The encoded maple node.
516 * Return: The parent maple node.
518 static inline struct maple_node *mte_parent(const struct maple_enode *enode)
520 return (void *)((unsigned long)
521 (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
525 * ma_dead_node() - check if the @enode is dead.
526 * @enode: The encoded maple node
528 * Return: true if dead, false otherwise.
530 static inline bool ma_dead_node(const struct maple_node *node)
532 struct maple_node *parent;
534 /* Do not reorder reads from the node prior to the parent check */
536 parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
537 return (parent == node);
541 * mte_dead_node() - check if the @enode is dead.
542 * @enode: The encoded maple node
544 * Return: true if dead, false otherwise.
546 static inline bool mte_dead_node(const struct maple_enode *enode)
548 struct maple_node *parent, *node;
550 node = mte_to_node(enode);
551 /* Do not reorder reads from the node prior to the parent check */
553 parent = mte_parent(enode);
554 return (parent == node);
558 * mas_allocated() - Get the number of nodes allocated in a maple state.
559 * @mas: The maple state
561 * The ma_state alloc member is overloaded to hold a pointer to the first
562 * allocated node or to the number of requested nodes to allocate. If bit 0 is
563 * set, then the alloc contains the number of requested nodes. If there is an
564 * allocated node, then the total allocated nodes is in that node.
566 * Return: The total number of nodes allocated
568 static inline unsigned long mas_allocated(const struct ma_state *mas)
570 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
573 return mas->alloc->total;
577 * mas_set_alloc_req() - Set the requested number of allocations.
578 * @mas: the maple state
579 * @count: the number of allocations.
581 * The requested number of allocations is either in the first allocated node,
582 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
583 * no allocated node. Set the request either in the node or do the necessary
584 * encoding to store in @mas->alloc directly.
586 static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
588 if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
592 mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
596 mas->alloc->request_count = count;
600 * mas_alloc_req() - get the requested number of allocations.
601 * @mas: The maple state
603 * The alloc count is either stored directly in @mas, or in
604 * @mas->alloc->request_count if there is at least one node allocated. Decode
605 * the request count if it's stored directly in @mas->alloc.
607 * Return: The allocation request count.
609 static inline unsigned int mas_alloc_req(const struct ma_state *mas)
611 if ((unsigned long)mas->alloc & 0x1)
612 return (unsigned long)(mas->alloc) >> 1;
614 return mas->alloc->request_count;
619 * ma_pivots() - Get a pointer to the maple node pivots.
620 * @node - the maple node
621 * @type - the node type
623 * In the event of a dead node, this array may be %NULL
625 * Return: A pointer to the maple node pivots
627 static inline unsigned long *ma_pivots(struct maple_node *node,
628 enum maple_type type)
631 case maple_arange_64:
632 return node->ma64.pivot;
635 return node->mr64.pivot;
643 * ma_gaps() - Get a pointer to the maple node gaps.
644 * @node - the maple node
645 * @type - the node type
647 * Return: A pointer to the maple node gaps
649 static inline unsigned long *ma_gaps(struct maple_node *node,
650 enum maple_type type)
653 case maple_arange_64:
654 return node->ma64.gap;
664 * mte_pivot() - Get the pivot at @piv of the maple encoded node.
665 * @mn: The maple encoded node.
668 * Return: the pivot at @piv of @mn.
670 static inline unsigned long mte_pivot(const struct maple_enode *mn,
673 struct maple_node *node = mte_to_node(mn);
674 enum maple_type type = mte_node_type(mn);
676 if (piv >= mt_pivots[type]) {
681 case maple_arange_64:
682 return node->ma64.pivot[piv];
685 return node->mr64.pivot[piv];
693 * mas_safe_pivot() - get the pivot at @piv or mas->max.
694 * @mas: The maple state
695 * @pivots: The pointer to the maple node pivots
696 * @piv: The pivot to fetch
697 * @type: The maple node type
699 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
702 static inline unsigned long
703 mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
704 unsigned char piv, enum maple_type type)
706 if (piv >= mt_pivots[type])
713 * mas_safe_min() - Return the minimum for a given offset.
714 * @mas: The maple state
715 * @pivots: The pointer to the maple node pivots
716 * @offset: The offset into the pivot array
718 * Return: The minimum range value that is contained in @offset.
720 static inline unsigned long
721 mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
724 return pivots[offset - 1] + 1;
730 * mas_logical_pivot() - Get the logical pivot of a given offset.
731 * @mas: The maple state
732 * @pivots: The pointer to the maple node pivots
733 * @offset: The offset into the pivot array
734 * @type: The maple node type
736 * When there is no value at a pivot (beyond the end of the data), then the
737 * pivot is actually @mas->max.
739 * Return: the logical pivot of a given @offset.
741 static inline unsigned long
742 mas_logical_pivot(struct ma_state *mas, unsigned long *pivots,
743 unsigned char offset, enum maple_type type)
745 unsigned long lpiv = mas_safe_pivot(mas, pivots, offset, type);
757 * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
758 * @mn: The encoded maple node
759 * @piv: The pivot offset
760 * @val: The value of the pivot
762 static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
765 struct maple_node *node = mte_to_node(mn);
766 enum maple_type type = mte_node_type(mn);
768 BUG_ON(piv >= mt_pivots[type]);
773 node->mr64.pivot[piv] = val;
775 case maple_arange_64:
776 node->ma64.pivot[piv] = val;
785 * ma_slots() - Get a pointer to the maple node slots.
786 * @mn: The maple node
787 * @mt: The maple node type
789 * Return: A pointer to the maple node slots
791 static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
795 case maple_arange_64:
796 return mn->ma64.slot;
799 return mn->mr64.slot;
805 static inline bool mt_locked(const struct maple_tree *mt)
807 return mt_external_lock(mt) ? mt_lock_is_held(mt) :
808 lockdep_is_held(&mt->ma_lock);
811 static inline void *mt_slot(const struct maple_tree *mt,
812 void __rcu **slots, unsigned char offset)
814 return rcu_dereference_check(slots[offset], mt_locked(mt));
817 static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
818 unsigned char offset)
820 return rcu_dereference_protected(slots[offset], mt_locked(mt));
823 * mas_slot_locked() - Get the slot value when holding the maple tree lock.
824 * @mas: The maple state
825 * @slots: The pointer to the slots
826 * @offset: The offset into the slots array to fetch
828 * Return: The entry stored in @slots at the @offset.
830 static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
831 unsigned char offset)
833 return mt_slot_locked(mas->tree, slots, offset);
837 * mas_slot() - Get the slot value when not holding the maple tree lock.
838 * @mas: The maple state
839 * @slots: The pointer to the slots
840 * @offset: The offset into the slots array to fetch
842 * Return: The entry stored in @slots at the @offset
844 static inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
845 unsigned char offset)
847 return mt_slot(mas->tree, slots, offset);
851 * mas_root() - Get the maple tree root.
852 * @mas: The maple state.
854 * Return: The pointer to the root of the tree
856 static inline void *mas_root(struct ma_state *mas)
858 return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
861 static inline void *mt_root_locked(struct maple_tree *mt)
863 return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
867 * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
868 * @mas: The maple state.
870 * Return: The pointer to the root of the tree
872 static inline void *mas_root_locked(struct ma_state *mas)
874 return mt_root_locked(mas->tree);
877 static inline struct maple_metadata *ma_meta(struct maple_node *mn,
881 case maple_arange_64:
882 return &mn->ma64.meta;
884 return &mn->mr64.meta;
889 * ma_set_meta() - Set the metadata information of a node.
890 * @mn: The maple node
891 * @mt: The maple node type
892 * @offset: The offset of the highest sub-gap in this node.
893 * @end: The end of the data in this node.
895 static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
896 unsigned char offset, unsigned char end)
898 struct maple_metadata *meta = ma_meta(mn, mt);
905 * mt_clear_meta() - clear the metadata information of a node, if it exists
906 * @mt: The maple tree
907 * @mn: The maple node
908 * @type: The maple node type
909 * @offset: The offset of the highest sub-gap in this node.
910 * @end: The end of the data in this node.
912 static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
913 enum maple_type type)
915 struct maple_metadata *meta;
916 unsigned long *pivots;
922 pivots = mn->mr64.pivot;
923 if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
924 slots = mn->mr64.slot;
925 next = mt_slot_locked(mt, slots,
926 MAPLE_RANGE64_SLOTS - 1);
927 if (unlikely((mte_to_node(next) &&
928 mte_node_type(next))))
929 return; /* no metadata, could be node */
932 case maple_arange_64:
933 meta = ma_meta(mn, type);
944 * ma_meta_end() - Get the data end of a node from the metadata
945 * @mn: The maple node
946 * @mt: The maple node type
948 static inline unsigned char ma_meta_end(struct maple_node *mn,
951 struct maple_metadata *meta = ma_meta(mn, mt);
957 * ma_meta_gap() - Get the largest gap location of a node from the metadata
958 * @mn: The maple node
959 * @mt: The maple node type
961 static inline unsigned char ma_meta_gap(struct maple_node *mn,
964 BUG_ON(mt != maple_arange_64);
966 return mn->ma64.meta.gap;
970 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
971 * @mn: The maple node
972 * @mn: The maple node type
973 * @offset: The location of the largest gap.
975 static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
976 unsigned char offset)
979 struct maple_metadata *meta = ma_meta(mn, mt);
985 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
986 * @mat - the ma_topiary, a linked list of dead nodes.
987 * @dead_enode - the node to be marked as dead and added to the tail of the list
989 * Add the @dead_enode to the linked list in @mat.
991 static inline void mat_add(struct ma_topiary *mat,
992 struct maple_enode *dead_enode)
994 mte_set_node_dead(dead_enode);
995 mte_to_mat(dead_enode)->next = NULL;
997 mat->tail = mat->head = dead_enode;
1001 mte_to_mat(mat->tail)->next = dead_enode;
1002 mat->tail = dead_enode;
1005 static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
1006 static inline void mas_free(struct ma_state *mas, struct maple_enode *used);
1009 * mas_mat_free() - Free all nodes in a dead list.
1010 * @mas - the maple state
1011 * @mat - the ma_topiary linked list of dead nodes to free.
1013 * Free walk a dead list.
1015 static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat)
1017 struct maple_enode *next;
1020 next = mte_to_mat(mat->head)->next;
1021 mas_free(mas, mat->head);
1027 * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
1028 * @mas - the maple state
1029 * @mat - the ma_topiary linked list of dead nodes to free.
1031 * Destroy walk a dead list.
1033 static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
1035 struct maple_enode *next;
1038 next = mte_to_mat(mat->head)->next;
1039 mte_destroy_walk(mat->head, mat->mtree);
1044 * mas_descend() - Descend into the slot stored in the ma_state.
1045 * @mas - the maple state.
1047 * Note: Not RCU safe, only use in write side or debug code.
1049 static inline void mas_descend(struct ma_state *mas)
1051 enum maple_type type;
1052 unsigned long *pivots;
1053 struct maple_node *node;
1057 type = mte_node_type(mas->node);
1058 pivots = ma_pivots(node, type);
1059 slots = ma_slots(node, type);
1062 mas->min = pivots[mas->offset - 1] + 1;
1063 mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
1064 mas->node = mas_slot(mas, slots, mas->offset);
1068 * mte_set_gap() - Set a maple node gap.
1069 * @mn: The encoded maple node
1070 * @gap: The offset of the gap to set
1071 * @val: The gap value
1073 static inline void mte_set_gap(const struct maple_enode *mn,
1074 unsigned char gap, unsigned long val)
1076 switch (mte_node_type(mn)) {
1079 case maple_arange_64:
1080 mte_to_node(mn)->ma64.gap[gap] = val;
1086 * mas_ascend() - Walk up a level of the tree.
1087 * @mas: The maple state
1089 * Sets the @mas->max and @mas->min to the correct values when walking up. This
1090 * may cause several levels of walking up to find the correct min and max.
1091 * May find a dead node which will cause a premature return.
1092 * Return: 1 on dead node, 0 otherwise
1094 static int mas_ascend(struct ma_state *mas)
1096 struct maple_enode *p_enode; /* parent enode. */
1097 struct maple_enode *a_enode; /* ancestor enode. */
1098 struct maple_node *a_node; /* ancestor node. */
1099 struct maple_node *p_node; /* parent node. */
1100 unsigned char a_slot;
1101 enum maple_type a_type;
1102 unsigned long min, max;
1103 unsigned long *pivots;
1104 unsigned char offset;
1105 bool set_max = false, set_min = false;
1107 a_node = mas_mn(mas);
1108 if (ma_is_root(a_node)) {
1113 p_node = mte_parent(mas->node);
1114 if (unlikely(a_node == p_node))
1116 a_type = mas_parent_enum(mas, mas->node);
1117 offset = mte_parent_slot(mas->node);
1118 a_enode = mt_mk_node(p_node, a_type);
1120 /* Check to make sure all parent information is still accurate */
1121 if (p_node != mte_parent(mas->node))
1124 mas->node = a_enode;
1125 mas->offset = offset;
1127 if (mte_is_root(a_enode)) {
1128 mas->max = ULONG_MAX;
1137 a_type = mas_parent_enum(mas, p_enode);
1138 a_node = mte_parent(p_enode);
1139 a_slot = mte_parent_slot(p_enode);
1140 a_enode = mt_mk_node(a_node, a_type);
1141 pivots = ma_pivots(a_node, a_type);
1143 if (unlikely(ma_dead_node(a_node)))
1146 if (!set_min && a_slot) {
1148 min = pivots[a_slot - 1] + 1;
1151 if (!set_max && a_slot < mt_pivots[a_type]) {
1153 max = pivots[a_slot];
1156 if (unlikely(ma_dead_node(a_node)))
1159 if (unlikely(ma_is_root(a_node)))
1162 } while (!set_min || !set_max);
1170 * mas_pop_node() - Get a previously allocated maple node from the maple state.
1171 * @mas: The maple state
1173 * Return: A pointer to a maple node.
1175 static inline struct maple_node *mas_pop_node(struct ma_state *mas)
1177 struct maple_alloc *ret, *node = mas->alloc;
1178 unsigned long total = mas_allocated(mas);
1179 unsigned int req = mas_alloc_req(mas);
1181 /* nothing or a request pending. */
1182 if (WARN_ON(!total))
1186 /* single allocation in this ma_state */
1192 if (node->node_count == 1) {
1193 /* Single allocation in this node. */
1194 mas->alloc = node->slot[0];
1195 mas->alloc->total = node->total - 1;
1200 ret = node->slot[--node->node_count];
1201 node->slot[node->node_count] = NULL;
1207 mas_set_alloc_req(mas, req);
1210 memset(ret, 0, sizeof(*ret));
1211 return (struct maple_node *)ret;
1215 * mas_push_node() - Push a node back on the maple state allocation.
1216 * @mas: The maple state
1217 * @used: The used maple node
1219 * Stores the maple node back into @mas->alloc for reuse. Updates allocated and
1220 * requested node count as necessary.
1222 static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
1224 struct maple_alloc *reuse = (struct maple_alloc *)used;
1225 struct maple_alloc *head = mas->alloc;
1226 unsigned long count;
1227 unsigned int requested = mas_alloc_req(mas);
1229 count = mas_allocated(mas);
1231 reuse->request_count = 0;
1232 reuse->node_count = 0;
1233 if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
1234 head->slot[head->node_count++] = reuse;
1240 if ((head) && !((unsigned long)head & 0x1)) {
1241 reuse->slot[0] = head;
1242 reuse->node_count = 1;
1243 reuse->total += head->total;
1249 mas_set_alloc_req(mas, requested - 1);
1253 * mas_alloc_nodes() - Allocate nodes into a maple state
1254 * @mas: The maple state
1255 * @gfp: The GFP Flags
1257 static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1259 struct maple_alloc *node;
1260 unsigned long allocated = mas_allocated(mas);
1261 unsigned int requested = mas_alloc_req(mas);
1263 void **slots = NULL;
1264 unsigned int max_req = 0;
1269 mas_set_alloc_req(mas, 0);
1270 if (mas->mas_flags & MA_STATE_PREALLOC) {
1273 WARN_ON(!allocated);
1276 if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
1277 node = (struct maple_alloc *)mt_alloc_one(gfp);
1282 node->slot[0] = mas->alloc;
1283 node->node_count = 1;
1285 node->node_count = 0;
1289 node->total = ++allocated;
1294 node->request_count = 0;
1296 max_req = MAPLE_ALLOC_SLOTS;
1297 if (node->node_count) {
1298 unsigned int offset = node->node_count;
1300 slots = (void **)&node->slot[offset];
1303 slots = (void **)&node->slot;
1306 max_req = min(requested, max_req);
1307 count = mt_alloc_bulk(gfp, max_req, slots);
1311 node->node_count += count;
1313 node = node->slot[0];
1314 node->node_count = 0;
1315 node->request_count = 0;
1318 mas->alloc->total = allocated;
1322 /* Clean up potential freed allocations on bulk failure */
1323 memset(slots, 0, max_req * sizeof(unsigned long));
1325 mas_set_alloc_req(mas, requested);
1326 if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
1327 mas->alloc->total = allocated;
1328 mas_set_err(mas, -ENOMEM);
1334 * mas_free() - Free an encoded maple node
1335 * @mas: The maple state
1336 * @used: The encoded maple node to free.
1338 * Uses rcu free if necessary, pushes @used back on the maple state allocations
1341 static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1343 struct maple_node *tmp = mte_to_node(used);
1345 if (mt_in_rcu(mas->tree))
1348 mas_push_node(mas, tmp);
1352 * mas_node_count() - Check if enough nodes are allocated and request more if
1353 * there is not enough nodes.
1354 * @mas: The maple state
1355 * @count: The number of nodes needed
1356 * @gfp: the gfp flags
1358 static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
1360 unsigned long allocated = mas_allocated(mas);
1362 if (allocated < count) {
1363 mas_set_alloc_req(mas, count - allocated);
1364 mas_alloc_nodes(mas, gfp);
1369 * mas_node_count() - Check if enough nodes are allocated and request more if
1370 * there is not enough nodes.
1371 * @mas: The maple state
1372 * @count: The number of nodes needed
1374 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
1376 static void mas_node_count(struct ma_state *mas, int count)
1378 return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
1382 * mas_start() - Sets up maple state for operations.
1383 * @mas: The maple state.
1385 * If mas->node == MAS_START, then set the min, max and depth to
1389 * - If mas->node is an error or not MAS_START, return NULL.
1390 * - If it's an empty tree: NULL & mas->node == MAS_NONE
1391 * - If it's a single entry: The entry & mas->node == MAS_ROOT
1392 * - If it's a tree: NULL & mas->node == safe root node.
1394 static inline struct maple_enode *mas_start(struct ma_state *mas)
1396 if (likely(mas_is_start(mas))) {
1397 struct maple_enode *root;
1400 mas->max = ULONG_MAX;
1404 root = mas_root(mas);
1405 /* Tree with nodes */
1406 if (likely(xa_is_node(root))) {
1408 mas->node = mte_safe_root(root);
1410 if (mte_dead_node(mas->node))
1417 if (unlikely(!root)) {
1418 mas->node = MAS_NONE;
1419 mas->offset = MAPLE_NODE_SLOTS;
1423 /* Single entry tree */
1424 mas->node = MAS_ROOT;
1425 mas->offset = MAPLE_NODE_SLOTS;
1427 /* Single entry tree. */
1438 * ma_data_end() - Find the end of the data in a node.
1439 * @node: The maple node
1440 * @type: The maple node type
1441 * @pivots: The array of pivots in the node
1442 * @max: The maximum value in the node
1444 * Uses metadata to find the end of the data when possible.
1445 * Return: The zero indexed last slot with data (may be null).
1447 static inline unsigned char ma_data_end(struct maple_node *node,
1448 enum maple_type type,
1449 unsigned long *pivots,
1452 unsigned char offset;
1457 if (type == maple_arange_64)
1458 return ma_meta_end(node, type);
1460 offset = mt_pivots[type] - 1;
1461 if (likely(!pivots[offset]))
1462 return ma_meta_end(node, type);
1464 if (likely(pivots[offset] == max))
1467 return mt_pivots[type];
1471 * mas_data_end() - Find the end of the data (slot).
1472 * @mas: the maple state
1474 * This method is optimized to check the metadata of a node if the node type
1475 * supports data end metadata.
1477 * Return: The zero indexed last slot with data (may be null).
1479 static inline unsigned char mas_data_end(struct ma_state *mas)
1481 enum maple_type type;
1482 struct maple_node *node;
1483 unsigned char offset;
1484 unsigned long *pivots;
1486 type = mte_node_type(mas->node);
1488 if (type == maple_arange_64)
1489 return ma_meta_end(node, type);
1491 pivots = ma_pivots(node, type);
1492 if (unlikely(ma_dead_node(node)))
1495 offset = mt_pivots[type] - 1;
1496 if (likely(!pivots[offset]))
1497 return ma_meta_end(node, type);
1499 if (likely(pivots[offset] == mas->max))
1502 return mt_pivots[type];
1506 * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1507 * @mas - the maple state
1509 * Return: The maximum gap in the leaf.
1511 static unsigned long mas_leaf_max_gap(struct ma_state *mas)
1514 unsigned long pstart, gap, max_gap;
1515 struct maple_node *mn;
1516 unsigned long *pivots;
1519 unsigned char max_piv;
1521 mt = mte_node_type(mas->node);
1523 slots = ma_slots(mn, mt);
1525 if (unlikely(ma_is_dense(mt))) {
1527 for (i = 0; i < mt_slots[mt]; i++) {
1542 * Check the first implied pivot optimizes the loop below and slot 1 may
1543 * be skipped if there is a gap in slot 0.
1545 pivots = ma_pivots(mn, mt);
1546 if (likely(!slots[0])) {
1547 max_gap = pivots[0] - mas->min + 1;
1553 /* reduce max_piv as the special case is checked before the loop */
1554 max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
1556 * Check end implied pivot which can only be a gap on the right most
1559 if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
1560 gap = ULONG_MAX - pivots[max_piv];
1565 for (; i <= max_piv; i++) {
1566 /* data == no gap. */
1567 if (likely(slots[i]))
1570 pstart = pivots[i - 1];
1571 gap = pivots[i] - pstart;
1575 /* There cannot be two gaps in a row. */
1582 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1583 * @node: The maple node
1584 * @gaps: The pointer to the gaps
1585 * @mt: The maple node type
1586 * @*off: Pointer to store the offset location of the gap.
1588 * Uses the metadata data end to scan backwards across set gaps.
1590 * Return: The maximum gap value
1592 static inline unsigned long
1593 ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1596 unsigned char offset, i;
1597 unsigned long max_gap = 0;
1599 i = offset = ma_meta_end(node, mt);
1601 if (gaps[i] > max_gap) {
1612 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1613 * @mas: The maple state.
1615 * If the metadata gap is set to MAPLE_ARANGE64_META_MAX, there is no gap.
1617 * Return: The gap value.
1619 static inline unsigned long mas_max_gap(struct ma_state *mas)
1621 unsigned long *gaps;
1622 unsigned char offset;
1624 struct maple_node *node;
1626 mt = mte_node_type(mas->node);
1628 return mas_leaf_max_gap(mas);
1631 offset = ma_meta_gap(node, mt);
1632 if (offset == MAPLE_ARANGE64_META_MAX)
1635 gaps = ma_gaps(node, mt);
1636 return gaps[offset];
1640 * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1641 * @mas: The maple state
1642 * @offset: The gap offset in the parent to set
1643 * @new: The new gap value.
1645 * Set the parent gap then continue to set the gap upwards, using the metadata
1646 * of the parent to see if it is necessary to check the node above.
1648 static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1651 unsigned long meta_gap = 0;
1652 struct maple_node *pnode;
1653 struct maple_enode *penode;
1654 unsigned long *pgaps;
1655 unsigned char meta_offset;
1656 enum maple_type pmt;
1658 pnode = mte_parent(mas->node);
1659 pmt = mas_parent_enum(mas, mas->node);
1660 penode = mt_mk_node(pnode, pmt);
1661 pgaps = ma_gaps(pnode, pmt);
1664 meta_offset = ma_meta_gap(pnode, pmt);
1665 if (meta_offset == MAPLE_ARANGE64_META_MAX)
1668 meta_gap = pgaps[meta_offset];
1670 pgaps[offset] = new;
1672 if (meta_gap == new)
1675 if (offset != meta_offset) {
1679 ma_set_meta_gap(pnode, pmt, offset);
1680 } else if (new < meta_gap) {
1682 new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1683 ma_set_meta_gap(pnode, pmt, meta_offset);
1686 if (ma_is_root(pnode))
1689 /* Go to the parent node. */
1690 pnode = mte_parent(penode);
1691 pmt = mas_parent_enum(mas, penode);
1692 pgaps = ma_gaps(pnode, pmt);
1693 offset = mte_parent_slot(penode);
1694 penode = mt_mk_node(pnode, pmt);
1699 * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1700 * @mas - the maple state.
1702 static inline void mas_update_gap(struct ma_state *mas)
1704 unsigned char pslot;
1705 unsigned long p_gap;
1706 unsigned long max_gap;
1708 if (!mt_is_alloc(mas->tree))
1711 if (mte_is_root(mas->node))
1714 max_gap = mas_max_gap(mas);
1716 pslot = mte_parent_slot(mas->node);
1717 p_gap = ma_gaps(mte_parent(mas->node),
1718 mas_parent_enum(mas, mas->node))[pslot];
1720 if (p_gap != max_gap)
1721 mas_parent_gap(mas, pslot, max_gap);
1725 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1726 * @parent with the slot encoded.
1727 * @mas - the maple state (for the tree)
1728 * @parent - the maple encoded node containing the children.
1730 static inline void mas_adopt_children(struct ma_state *mas,
1731 struct maple_enode *parent)
1733 enum maple_type type = mte_node_type(parent);
1734 struct maple_node *node = mas_mn(mas);
1735 void __rcu **slots = ma_slots(node, type);
1736 unsigned long *pivots = ma_pivots(node, type);
1737 struct maple_enode *child;
1738 unsigned char offset;
1740 offset = ma_data_end(node, type, pivots, mas->max);
1742 child = mas_slot_locked(mas, slots, offset);
1743 mte_set_parent(child, parent, offset);
1748 * mas_replace() - Replace a maple node in the tree with mas->node. Uses the
1749 * parent encoding to locate the maple node in the tree.
1750 * @mas - the ma_state to use for operations.
1751 * @advanced - boolean to adopt the child nodes and free the old node (false) or
1752 * leave the node (true) and handle the adoption and free elsewhere.
1754 static inline void mas_replace(struct ma_state *mas, bool advanced)
1755 __must_hold(mas->tree->lock)
1757 struct maple_node *mn = mas_mn(mas);
1758 struct maple_enode *old_enode;
1759 unsigned char offset = 0;
1760 void __rcu **slots = NULL;
1762 if (ma_is_root(mn)) {
1763 old_enode = mas_root_locked(mas);
1765 offset = mte_parent_slot(mas->node);
1766 slots = ma_slots(mte_parent(mas->node),
1767 mas_parent_enum(mas, mas->node));
1768 old_enode = mas_slot_locked(mas, slots, offset);
1771 if (!advanced && !mte_is_leaf(mas->node))
1772 mas_adopt_children(mas, mas->node);
1774 if (mte_is_root(mas->node)) {
1775 mn->parent = ma_parent_ptr(
1776 ((unsigned long)mas->tree | MA_ROOT_PARENT));
1777 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1778 mas_set_height(mas);
1780 rcu_assign_pointer(slots[offset], mas->node);
1784 mas_free(mas, old_enode);
1788 * mas_new_child() - Find the new child of a node.
1789 * @mas: the maple state
1790 * @child: the maple state to store the child.
1792 static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child)
1793 __must_hold(mas->tree->lock)
1796 unsigned char offset;
1798 unsigned long *pivots;
1799 struct maple_enode *entry;
1800 struct maple_node *node;
1803 mt = mte_node_type(mas->node);
1805 slots = ma_slots(node, mt);
1806 pivots = ma_pivots(node, mt);
1807 end = ma_data_end(node, mt, pivots, mas->max);
1808 for (offset = mas->offset; offset <= end; offset++) {
1809 entry = mas_slot_locked(mas, slots, offset);
1810 if (mte_parent(entry) == node) {
1812 mas->offset = offset + 1;
1813 child->offset = offset;
1823 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
1824 * old data or set b_node->b_end.
1825 * @b_node: the maple_big_node
1826 * @shift: the shift count
1828 static inline void mab_shift_right(struct maple_big_node *b_node,
1829 unsigned char shift)
1831 unsigned long size = b_node->b_end * sizeof(unsigned long);
1833 memmove(b_node->pivot + shift, b_node->pivot, size);
1834 memmove(b_node->slot + shift, b_node->slot, size);
1835 if (b_node->type == maple_arange_64)
1836 memmove(b_node->gap + shift, b_node->gap, size);
1840 * mab_middle_node() - Check if a middle node is needed (unlikely)
1841 * @b_node: the maple_big_node that contains the data.
1842 * @size: the amount of data in the b_node
1843 * @split: the potential split location
1844 * @slot_count: the size that can be stored in a single node being considered.
1846 * Return: true if a middle node is required.
1848 static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
1849 unsigned char slot_count)
1851 unsigned char size = b_node->b_end;
1853 if (size >= 2 * slot_count)
1856 if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
1863 * mab_no_null_split() - ensure the split doesn't fall on a NULL
1864 * @b_node: the maple_big_node with the data
1865 * @split: the suggested split location
1866 * @slot_count: the number of slots in the node being considered.
1868 * Return: the split location.
1870 static inline int mab_no_null_split(struct maple_big_node *b_node,
1871 unsigned char split, unsigned char slot_count)
1873 if (!b_node->slot[split]) {
1875 * If the split is less than the max slot && the right side will
1876 * still be sufficient, then increment the split on NULL.
1878 if ((split < slot_count - 1) &&
1879 (b_node->b_end - split) > (mt_min_slots[b_node->type]))
1888 * mab_calc_split() - Calculate the split location and if there needs to be two
1890 * @bn: The maple_big_node with the data
1891 * @mid_split: The second split, if required. 0 otherwise.
1893 * Return: The first split location. The middle split is set in @mid_split.
1895 static inline int mab_calc_split(struct ma_state *mas,
1896 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
1898 unsigned char b_end = bn->b_end;
1899 int split = b_end / 2; /* Assume equal split. */
1900 unsigned char slot_min, slot_count = mt_slots[bn->type];
1903 * To support gap tracking, all NULL entries are kept together and a node cannot
1904 * end on a NULL entry, with the exception of the left-most leaf. The
1905 * limitation means that the split of a node must be checked for this condition
1906 * and be able to put more data in one direction or the other.
1908 if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
1910 split = b_end - mt_min_slots[bn->type];
1912 if (!ma_is_leaf(bn->type))
1915 mas->mas_flags |= MA_STATE_REBALANCE;
1916 if (!bn->slot[split])
1922 * Although extremely rare, it is possible to enter what is known as the 3-way
1923 * split scenario. The 3-way split comes about by means of a store of a range
1924 * that overwrites the end and beginning of two full nodes. The result is a set
1925 * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can
1926 * also be located in different parent nodes which are also full. This can
1927 * carry upwards all the way to the root in the worst case.
1929 if (unlikely(mab_middle_node(bn, split, slot_count))) {
1931 *mid_split = split * 2;
1933 slot_min = mt_min_slots[bn->type];
1937 * Avoid having a range less than the slot count unless it
1938 * causes one node to be deficient.
1939 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
1941 while (((bn->pivot[split] - min) < slot_count - 1) &&
1942 (split < slot_count - 1) && (b_end - split > slot_min))
1946 /* Avoid ending a node on a NULL entry */
1947 split = mab_no_null_split(bn, split, slot_count);
1951 *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
1957 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
1958 * and set @b_node->b_end to the next free slot.
1959 * @mas: The maple state
1960 * @mas_start: The starting slot to copy
1961 * @mas_end: The end slot to copy (inclusively)
1962 * @b_node: The maple_big_node to place the data
1963 * @mab_start: The starting location in maple_big_node to store the data.
1965 static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
1966 unsigned char mas_end, struct maple_big_node *b_node,
1967 unsigned char mab_start)
1970 struct maple_node *node;
1972 unsigned long *pivots, *gaps;
1973 int i = mas_start, j = mab_start;
1974 unsigned char piv_end;
1977 mt = mte_node_type(mas->node);
1978 pivots = ma_pivots(node, mt);
1980 b_node->pivot[j] = pivots[i++];
1981 if (unlikely(i > mas_end))
1986 piv_end = min(mas_end, mt_pivots[mt]);
1987 for (; i < piv_end; i++, j++) {
1988 b_node->pivot[j] = pivots[i];
1989 if (unlikely(!b_node->pivot[j]))
1992 if (unlikely(mas->max == b_node->pivot[j]))
1996 if (likely(i <= mas_end))
1997 b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
2000 b_node->b_end = ++j;
2002 slots = ma_slots(node, mt);
2003 memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
2004 if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
2005 gaps = ma_gaps(node, mt);
2006 memcpy(b_node->gap + mab_start, gaps + mas_start,
2007 sizeof(unsigned long) * j);
2012 * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
2013 * @mas: The maple state
2014 * @node: The maple node
2015 * @pivots: pointer to the maple node pivots
2016 * @mt: The maple type
2017 * @end: The assumed end
2019 * Note, end may be incremented within this function but not modified at the
2020 * source. This is fine since the metadata is the last thing to be stored in a
2021 * node during a write.
2023 static inline void mas_leaf_set_meta(struct ma_state *mas,
2024 struct maple_node *node, unsigned long *pivots,
2025 enum maple_type mt, unsigned char end)
2027 /* There is no room for metadata already */
2028 if (mt_pivots[mt] <= end)
2031 if (pivots[end] && pivots[end] < mas->max)
2034 if (end < mt_slots[mt] - 1)
2035 ma_set_meta(node, mt, 0, end);
2039 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
2040 * @b_node: the maple_big_node that has the data
2041 * @mab_start: the start location in @b_node.
2042 * @mab_end: The end location in @b_node (inclusively)
2043 * @mas: The maple state with the maple encoded node.
2045 static inline void mab_mas_cp(struct maple_big_node *b_node,
2046 unsigned char mab_start, unsigned char mab_end,
2047 struct ma_state *mas, bool new_max)
2050 enum maple_type mt = mte_node_type(mas->node);
2051 struct maple_node *node = mte_to_node(mas->node);
2052 void __rcu **slots = ma_slots(node, mt);
2053 unsigned long *pivots = ma_pivots(node, mt);
2054 unsigned long *gaps = NULL;
2057 if (mab_end - mab_start > mt_pivots[mt])
2060 if (!pivots[mt_pivots[mt] - 1])
2061 slots[mt_pivots[mt]] = NULL;
2065 pivots[j++] = b_node->pivot[i++];
2066 } while (i <= mab_end && likely(b_node->pivot[i]));
2068 memcpy(slots, b_node->slot + mab_start,
2069 sizeof(void *) * (i - mab_start));
2072 mas->max = b_node->pivot[i - 1];
2075 if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
2076 unsigned long max_gap = 0;
2077 unsigned char offset = 15;
2079 gaps = ma_gaps(node, mt);
2081 gaps[--j] = b_node->gap[--i];
2082 if (gaps[j] > max_gap) {
2088 ma_set_meta(node, mt, offset, end);
2090 mas_leaf_set_meta(mas, node, pivots, mt, end);
2095 * mas_descend_adopt() - Descend through a sub-tree and adopt children.
2096 * @mas: the maple state with the maple encoded node of the sub-tree.
2098 * Descend through a sub-tree and adopt children who do not have the correct
2099 * parents set. Follow the parents which have the correct parents as they are
2100 * the new entries which need to be followed to find other incorrectly set
2103 static inline void mas_descend_adopt(struct ma_state *mas)
2105 struct ma_state list[3], next[3];
2109 * At each level there may be up to 3 correct parent pointers which indicates
2110 * the new nodes which need to be walked to find any new nodes at a lower level.
2113 for (i = 0; i < 3; i++) {
2120 while (!mte_is_leaf(list[0].node)) {
2122 for (i = 0; i < 3; i++) {
2123 if (mas_is_none(&list[i]))
2126 if (i && list[i-1].node == list[i].node)
2129 while ((n < 3) && (mas_new_child(&list[i], &next[n])))
2132 mas_adopt_children(&list[i], list[i].node);
2136 next[n++].node = MAS_NONE;
2138 /* descend by setting the list to the children */
2139 for (i = 0; i < 3; i++)
2145 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
2146 * @mas: The maple state
2147 * @end: The maple node end
2148 * @mt: The maple node type
2150 static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
2153 if (!(mas->mas_flags & MA_STATE_BULK))
2156 if (mte_is_root(mas->node))
2159 if (end > mt_min_slots[mt]) {
2160 mas->mas_flags &= ~MA_STATE_REBALANCE;
2166 * mas_store_b_node() - Store an @entry into the b_node while also copying the
2167 * data from a maple encoded node.
2168 * @wr_mas: the maple write state
2169 * @b_node: the maple_big_node to fill with data
2170 * @offset_end: the offset to end copying
2172 * Return: The actual end of the data stored in @b_node
2174 static inline void mas_store_b_node(struct ma_wr_state *wr_mas,
2175 struct maple_big_node *b_node, unsigned char offset_end)
2178 unsigned char b_end;
2179 /* Possible underflow of piv will wrap back to 0 before use. */
2181 struct ma_state *mas = wr_mas->mas;
2183 b_node->type = wr_mas->type;
2187 /* Copy start data up to insert. */
2188 mas_mab_cp(mas, 0, slot - 1, b_node, 0);
2189 b_end = b_node->b_end;
2190 piv = b_node->pivot[b_end - 1];
2194 if (piv + 1 < mas->index) {
2195 /* Handle range starting after old range */
2196 b_node->slot[b_end] = wr_mas->content;
2197 if (!wr_mas->content)
2198 b_node->gap[b_end] = mas->index - 1 - piv;
2199 b_node->pivot[b_end++] = mas->index - 1;
2202 /* Store the new entry. */
2203 mas->offset = b_end;
2204 b_node->slot[b_end] = wr_mas->entry;
2205 b_node->pivot[b_end] = mas->last;
2208 if (mas->last >= mas->max)
2211 /* Handle new range ending before old range ends */
2212 piv = mas_logical_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
2213 if (piv > mas->last) {
2214 if (piv == ULONG_MAX)
2215 mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
2217 if (offset_end != slot)
2218 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2221 b_node->slot[++b_end] = wr_mas->content;
2222 if (!wr_mas->content)
2223 b_node->gap[b_end] = piv - mas->last + 1;
2224 b_node->pivot[b_end] = piv;
2227 slot = offset_end + 1;
2228 if (slot > wr_mas->node_end)
2231 /* Copy end data to the end of the node. */
2232 mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end);
2237 b_node->b_end = b_end;
2241 * mas_prev_sibling() - Find the previous node with the same parent.
2242 * @mas: the maple state
2244 * Return: True if there is a previous sibling, false otherwise.
2246 static inline bool mas_prev_sibling(struct ma_state *mas)
2248 unsigned int p_slot = mte_parent_slot(mas->node);
2250 if (mte_is_root(mas->node))
2257 mas->offset = p_slot - 1;
2263 * mas_next_sibling() - Find the next node with the same parent.
2264 * @mas: the maple state
2266 * Return: true if there is a next sibling, false otherwise.
2268 static inline bool mas_next_sibling(struct ma_state *mas)
2270 MA_STATE(parent, mas->tree, mas->index, mas->last);
2272 if (mte_is_root(mas->node))
2276 mas_ascend(&parent);
2277 parent.offset = mte_parent_slot(mas->node) + 1;
2278 if (parent.offset > mas_data_end(&parent))
2287 * mte_node_or_node() - Return the encoded node or MAS_NONE.
2288 * @enode: The encoded maple node.
2290 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state.
2292 * Return: @enode or MAS_NONE
2294 static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
2299 return ma_enode_ptr(MAS_NONE);
2303 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
2304 * @wr_mas: The maple write state
2306 * Uses mas_slot_locked() and does not need to worry about dead nodes.
2308 static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
2310 struct ma_state *mas = wr_mas->mas;
2311 unsigned char count;
2312 unsigned char offset;
2313 unsigned long index, min, max;
2315 if (unlikely(ma_is_dense(wr_mas->type))) {
2316 wr_mas->r_max = wr_mas->r_min = mas->index;
2317 mas->offset = mas->index = mas->min;
2321 wr_mas->node = mas_mn(wr_mas->mas);
2322 wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
2323 count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type,
2324 wr_mas->pivots, mas->max);
2325 offset = mas->offset;
2326 min = mas_safe_min(mas, wr_mas->pivots, offset);
2327 if (unlikely(offset == count))
2330 max = wr_mas->pivots[offset];
2332 if (unlikely(index <= max))
2335 if (unlikely(!max && offset))
2339 while (++offset < count) {
2340 max = wr_mas->pivots[offset];
2343 else if (unlikely(!max))
2352 wr_mas->r_max = max;
2353 wr_mas->r_min = min;
2354 wr_mas->offset_end = mas->offset = offset;
2358 * mas_topiary_range() - Add a range of slots to the topiary.
2359 * @mas: The maple state
2360 * @destroy: The topiary to add the slots (usually destroy)
2361 * @start: The starting slot inclusively
2362 * @end: The end slot inclusively
2364 static inline void mas_topiary_range(struct ma_state *mas,
2365 struct ma_topiary *destroy, unsigned char start, unsigned char end)
2368 unsigned char offset;
2370 MT_BUG_ON(mas->tree, mte_is_leaf(mas->node));
2371 slots = ma_slots(mas_mn(mas), mte_node_type(mas->node));
2372 for (offset = start; offset <= end; offset++) {
2373 struct maple_enode *enode = mas_slot_locked(mas, slots, offset);
2375 if (mte_dead_node(enode))
2378 mat_add(destroy, enode);
2383 * mast_topiary() - Add the portions of the tree to the removal list; either to
2384 * be freed or discarded (destroy walk).
2385 * @mast: The maple_subtree_state.
2387 static inline void mast_topiary(struct maple_subtree_state *mast)
2389 MA_WR_STATE(wr_mas, mast->orig_l, NULL);
2390 unsigned char r_start, r_end;
2391 unsigned char l_start, l_end;
2392 void __rcu **l_slots, **r_slots;
2394 wr_mas.type = mte_node_type(mast->orig_l->node);
2395 mast->orig_l->index = mast->orig_l->last;
2396 mas_wr_node_walk(&wr_mas);
2397 l_start = mast->orig_l->offset + 1;
2398 l_end = mas_data_end(mast->orig_l);
2400 r_end = mast->orig_r->offset;
2405 l_slots = ma_slots(mas_mn(mast->orig_l),
2406 mte_node_type(mast->orig_l->node));
2408 r_slots = ma_slots(mas_mn(mast->orig_r),
2409 mte_node_type(mast->orig_r->node));
2411 if ((l_start < l_end) &&
2412 mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) {
2416 if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) {
2421 if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node))
2424 /* At the node where left and right sides meet, add the parts between */
2425 if (mast->orig_l->node == mast->orig_r->node) {
2426 return mas_topiary_range(mast->orig_l, mast->destroy,
2430 /* mast->orig_r is different and consumed. */
2431 if (mte_is_leaf(mast->orig_r->node))
2434 if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end)))
2438 if (l_start <= l_end)
2439 mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end);
2441 if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start)))
2444 if (r_start <= r_end)
2445 mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end);
2449 * mast_rebalance_next() - Rebalance against the next node
2450 * @mast: The maple subtree state
2451 * @old_r: The encoded maple node to the right (next node).
2453 static inline void mast_rebalance_next(struct maple_subtree_state *mast)
2455 unsigned char b_end = mast->bn->b_end;
2457 mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
2459 mast->orig_r->last = mast->orig_r->max;
2463 * mast_rebalance_prev() - Rebalance against the previous node
2464 * @mast: The maple subtree state
2465 * @old_l: The encoded maple node to the left (previous node)
2467 static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
2469 unsigned char end = mas_data_end(mast->orig_l) + 1;
2470 unsigned char b_end = mast->bn->b_end;
2472 mab_shift_right(mast->bn, end);
2473 mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
2474 mast->l->min = mast->orig_l->min;
2475 mast->orig_l->index = mast->orig_l->min;
2476 mast->bn->b_end = end + b_end;
2477 mast->l->offset += end;
2481 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
2482 * the node to the right. Checking the nodes to the right then the left at each
2483 * level upwards until root is reached. Free and destroy as needed.
2484 * Data is copied into the @mast->bn.
2485 * @mast: The maple_subtree_state.
2488 bool mast_spanning_rebalance(struct maple_subtree_state *mast)
2490 struct ma_state r_tmp = *mast->orig_r;
2491 struct ma_state l_tmp = *mast->orig_l;
2492 struct maple_enode *ancestor = NULL;
2493 unsigned char start, end;
2494 unsigned char depth = 0;
2496 r_tmp = *mast->orig_r;
2497 l_tmp = *mast->orig_l;
2499 mas_ascend(mast->orig_r);
2500 mas_ascend(mast->orig_l);
2503 (mast->orig_r->node == mast->orig_l->node)) {
2504 ancestor = mast->orig_r->node;
2505 end = mast->orig_r->offset - 1;
2506 start = mast->orig_l->offset + 1;
2509 if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
2511 ancestor = mast->orig_r->node;
2515 mast->orig_r->offset++;
2517 mas_descend(mast->orig_r);
2518 mast->orig_r->offset = 0;
2522 mast_rebalance_next(mast);
2524 unsigned char l_off = 0;
2525 struct maple_enode *child = r_tmp.node;
2528 if (ancestor == r_tmp.node)
2534 if (l_off < r_tmp.offset)
2535 mas_topiary_range(&r_tmp, mast->destroy,
2536 l_off, r_tmp.offset);
2538 if (l_tmp.node != child)
2539 mat_add(mast->free, child);
2541 } while (r_tmp.node != ancestor);
2543 *mast->orig_l = l_tmp;
2546 } else if (mast->orig_l->offset != 0) {
2548 ancestor = mast->orig_l->node;
2549 end = mas_data_end(mast->orig_l);
2552 mast->orig_l->offset--;
2554 mas_descend(mast->orig_l);
2555 mast->orig_l->offset =
2556 mas_data_end(mast->orig_l);
2560 mast_rebalance_prev(mast);
2562 unsigned char r_off;
2563 struct maple_enode *child = l_tmp.node;
2566 if (ancestor == l_tmp.node)
2569 r_off = mas_data_end(&l_tmp);
2571 if (l_tmp.offset < r_off)
2574 if (l_tmp.offset < r_off)
2575 mas_topiary_range(&l_tmp, mast->destroy,
2576 l_tmp.offset, r_off);
2578 if (r_tmp.node != child)
2579 mat_add(mast->free, child);
2581 } while (l_tmp.node != ancestor);
2583 *mast->orig_r = r_tmp;
2586 } while (!mte_is_root(mast->orig_r->node));
2588 *mast->orig_r = r_tmp;
2589 *mast->orig_l = l_tmp;
2594 * mast_ascend_free() - Add current original maple state nodes to the free list
2596 * @mast: the maple subtree state.
2598 * Ascend the original left and right sides and add the previous nodes to the
2599 * free list. Set the slots to point to the correct location in the new nodes.
2602 mast_ascend_free(struct maple_subtree_state *mast)
2604 MA_WR_STATE(wr_mas, mast->orig_r, NULL);
2605 struct maple_enode *left = mast->orig_l->node;
2606 struct maple_enode *right = mast->orig_r->node;
2608 mas_ascend(mast->orig_l);
2609 mas_ascend(mast->orig_r);
2610 mat_add(mast->free, left);
2613 mat_add(mast->free, right);
2615 mast->orig_r->offset = 0;
2616 mast->orig_r->index = mast->r->max;
2617 /* last should be larger than or equal to index */
2618 if (mast->orig_r->last < mast->orig_r->index)
2619 mast->orig_r->last = mast->orig_r->index;
2621 * The node may not contain the value so set slot to ensure all
2622 * of the nodes contents are freed or destroyed.
2624 wr_mas.type = mte_node_type(mast->orig_r->node);
2625 mas_wr_node_walk(&wr_mas);
2626 /* Set up the left side of things */
2627 mast->orig_l->offset = 0;
2628 mast->orig_l->index = mast->l->min;
2629 wr_mas.mas = mast->orig_l;
2630 wr_mas.type = mte_node_type(mast->orig_l->node);
2631 mas_wr_node_walk(&wr_mas);
2633 mast->bn->type = wr_mas.type;
2637 * mas_new_ma_node() - Create and return a new maple node. Helper function.
2638 * @mas: the maple state with the allocations.
2639 * @b_node: the maple_big_node with the type encoding.
2641 * Use the node type from the maple_big_node to allocate a new node from the
2642 * ma_state. This function exists mainly for code readability.
2644 * Return: A new maple encoded node
2646 static inline struct maple_enode
2647 *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
2649 return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
2653 * mas_mab_to_node() - Set up right and middle nodes
2655 * @mas: the maple state that contains the allocations.
2656 * @b_node: the node which contains the data.
2657 * @left: The pointer which will have the left node
2658 * @right: The pointer which may have the right node
2659 * @middle: the pointer which may have the middle node (rare)
2660 * @mid_split: the split location for the middle node
2662 * Return: the split of left.
2664 static inline unsigned char mas_mab_to_node(struct ma_state *mas,
2665 struct maple_big_node *b_node, struct maple_enode **left,
2666 struct maple_enode **right, struct maple_enode **middle,
2667 unsigned char *mid_split, unsigned long min)
2669 unsigned char split = 0;
2670 unsigned char slot_count = mt_slots[b_node->type];
2672 *left = mas_new_ma_node(mas, b_node);
2677 if (b_node->b_end < slot_count) {
2678 split = b_node->b_end;
2680 split = mab_calc_split(mas, b_node, mid_split, min);
2681 *right = mas_new_ma_node(mas, b_node);
2685 *middle = mas_new_ma_node(mas, b_node);
2692 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
2694 * @b_node - the big node to add the entry
2695 * @mas - the maple state to get the pivot (mas->max)
2696 * @entry - the entry to add, if NULL nothing happens.
2698 static inline void mab_set_b_end(struct maple_big_node *b_node,
2699 struct ma_state *mas,
2705 b_node->slot[b_node->b_end] = entry;
2706 if (mt_is_alloc(mas->tree))
2707 b_node->gap[b_node->b_end] = mas_max_gap(mas);
2708 b_node->pivot[b_node->b_end++] = mas->max;
2712 * mas_set_split_parent() - combine_then_separate helper function. Sets the parent
2713 * of @mas->node to either @left or @right, depending on @slot and @split
2715 * @mas - the maple state with the node that needs a parent
2716 * @left - possible parent 1
2717 * @right - possible parent 2
2718 * @slot - the slot the mas->node was placed
2719 * @split - the split location between @left and @right
2721 static inline void mas_set_split_parent(struct ma_state *mas,
2722 struct maple_enode *left,
2723 struct maple_enode *right,
2724 unsigned char *slot, unsigned char split)
2726 if (mas_is_none(mas))
2729 if ((*slot) <= split)
2730 mte_set_parent(mas->node, left, *slot);
2732 mte_set_parent(mas->node, right, (*slot) - split - 1);
2738 * mte_mid_split_check() - Check if the next node passes the mid-split
2739 * @**l: Pointer to left encoded maple node.
2740 * @**m: Pointer to middle encoded maple node.
2741 * @**r: Pointer to right encoded maple node.
2743 * @*split: The split location.
2744 * @mid_split: The middle split.
2746 static inline void mte_mid_split_check(struct maple_enode **l,
2747 struct maple_enode **r,
2748 struct maple_enode *right,
2750 unsigned char *split,
2751 unsigned char mid_split)
2756 if (slot < mid_split)
2765 * mast_set_split_parents() - Helper function to set three nodes parents. Slot
2766 * is taken from @mast->l.
2767 * @mast - the maple subtree state
2768 * @left - the left node
2769 * @right - the right node
2770 * @split - the split location.
2772 static inline void mast_set_split_parents(struct maple_subtree_state *mast,
2773 struct maple_enode *left,
2774 struct maple_enode *middle,
2775 struct maple_enode *right,
2776 unsigned char split,
2777 unsigned char mid_split)
2780 struct maple_enode *l = left;
2781 struct maple_enode *r = right;
2783 if (mas_is_none(mast->l))
2789 slot = mast->l->offset;
2791 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2792 mas_set_split_parent(mast->l, l, r, &slot, split);
2794 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2795 mas_set_split_parent(mast->m, l, r, &slot, split);
2797 mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2798 mas_set_split_parent(mast->r, l, r, &slot, split);
2802 * mas_wmb_replace() - Write memory barrier and replace
2803 * @mas: The maple state
2804 * @free: the maple topiary list of nodes to free
2805 * @destroy: The maple topiary list of nodes to destroy (walk and free)
2807 * Updates gap as necessary.
2809 static inline void mas_wmb_replace(struct ma_state *mas,
2810 struct ma_topiary *free,
2811 struct ma_topiary *destroy)
2813 /* All nodes must see old data as dead prior to replacing that data */
2814 smp_wmb(); /* Needed for RCU */
2816 /* Insert the new data in the tree */
2817 mas_replace(mas, true);
2819 if (!mte_is_leaf(mas->node))
2820 mas_descend_adopt(mas);
2822 mas_mat_free(mas, free);
2825 mas_mat_destroy(mas, destroy);
2827 if (mte_is_leaf(mas->node))
2830 mas_update_gap(mas);
2834 * mast_new_root() - Set a new tree root during subtree creation
2835 * @mast: The maple subtree state
2836 * @mas: The maple state
2838 static inline void mast_new_root(struct maple_subtree_state *mast,
2839 struct ma_state *mas)
2841 mas_mn(mast->l)->parent =
2842 ma_parent_ptr(((unsigned long)mas->tree | MA_ROOT_PARENT));
2843 if (!mte_dead_node(mast->orig_l->node) &&
2844 !mte_is_root(mast->orig_l->node)) {
2846 mast_ascend_free(mast);
2848 } while (!mte_is_root(mast->orig_l->node));
2850 if ((mast->orig_l->node != mas->node) &&
2851 (mast->l->depth > mas_mt_height(mas))) {
2852 mat_add(mast->free, mas->node);
2857 * mast_cp_to_nodes() - Copy data out to nodes.
2858 * @mast: The maple subtree state
2859 * @left: The left encoded maple node
2860 * @middle: The middle encoded maple node
2861 * @right: The right encoded maple node
2862 * @split: The location to split between left and (middle ? middle : right)
2863 * @mid_split: The location to split between middle and right.
2865 static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
2866 struct maple_enode *left, struct maple_enode *middle,
2867 struct maple_enode *right, unsigned char split, unsigned char mid_split)
2869 bool new_lmax = true;
2871 mast->l->node = mte_node_or_none(left);
2872 mast->m->node = mte_node_or_none(middle);
2873 mast->r->node = mte_node_or_none(right);
2875 mast->l->min = mast->orig_l->min;
2876 if (split == mast->bn->b_end) {
2877 mast->l->max = mast->orig_r->max;
2881 mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
2884 mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
2885 mast->m->min = mast->bn->pivot[split] + 1;
2889 mast->r->max = mast->orig_r->max;
2891 mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
2892 mast->r->min = mast->bn->pivot[split] + 1;
2897 * mast_combine_cp_left - Copy in the original left side of the tree into the
2898 * combined data set in the maple subtree state big node.
2899 * @mast: The maple subtree state
2901 static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
2903 unsigned char l_slot = mast->orig_l->offset;
2908 mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
2912 * mast_combine_cp_right: Copy in the original right side of the tree into the
2913 * combined data set in the maple subtree state big node.
2914 * @mast: The maple subtree state
2916 static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
2918 if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
2921 mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
2922 mt_slot_count(mast->orig_r->node), mast->bn,
2924 mast->orig_r->last = mast->orig_r->max;
2928 * mast_sufficient: Check if the maple subtree state has enough data in the big
2929 * node to create at least one sufficient node
2930 * @mast: the maple subtree state
2932 static inline bool mast_sufficient(struct maple_subtree_state *mast)
2934 if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
2941 * mast_overflow: Check if there is too much data in the subtree state for a
2943 * @mast: The maple subtree state
2945 static inline bool mast_overflow(struct maple_subtree_state *mast)
2947 if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
2953 static inline void *mtree_range_walk(struct ma_state *mas)
2955 unsigned long *pivots;
2956 unsigned char offset;
2957 struct maple_node *node;
2958 struct maple_enode *next, *last;
2959 enum maple_type type;
2962 unsigned long max, min;
2963 unsigned long prev_max, prev_min;
2971 node = mte_to_node(next);
2972 type = mte_node_type(next);
2973 pivots = ma_pivots(node, type);
2974 end = ma_data_end(node, type, pivots, max);
2975 if (unlikely(ma_dead_node(node)))
2978 if (pivots[offset] >= mas->index) {
2981 max = pivots[offset];
2987 } while ((offset < end) && (pivots[offset] < mas->index));
2990 min = pivots[offset - 1] + 1;
2992 if (likely(offset < end && pivots[offset]))
2993 max = pivots[offset];
2996 slots = ma_slots(node, type);
2997 next = mt_slot(mas->tree, slots, offset);
2998 if (unlikely(ma_dead_node(node)))
3000 } while (!ma_is_leaf(type));
3002 mas->offset = offset;
3005 mas->min = prev_min;
3006 mas->max = prev_max;
3008 return (void *) next;
3016 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
3017 * @mas: The starting maple state
3018 * @mast: The maple_subtree_state, keeps track of 4 maple states.
3019 * @count: The estimated count of iterations needed.
3021 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
3022 * is hit. First @b_node is split into two entries which are inserted into the
3023 * next iteration of the loop. @b_node is returned populated with the final
3024 * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the
3025 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
3026 * to account of what has been copied into the new sub-tree. The update of
3027 * orig_l_mas->last is used in mas_consume to find the slots that will need to
3028 * be either freed or destroyed. orig_l_mas->depth keeps track of the height of
3029 * the new sub-tree in case the sub-tree becomes the full tree.
3031 * Return: the number of elements in b_node during the last loop.
3033 static int mas_spanning_rebalance(struct ma_state *mas,
3034 struct maple_subtree_state *mast, unsigned char count)
3036 unsigned char split, mid_split;
3037 unsigned char slot = 0;
3038 struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
3040 MA_STATE(l_mas, mas->tree, mas->index, mas->index);
3041 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3042 MA_STATE(m_mas, mas->tree, mas->index, mas->index);
3043 MA_TOPIARY(free, mas->tree);
3044 MA_TOPIARY(destroy, mas->tree);
3047 * The tree needs to be rebalanced and leaves need to be kept at the same level.
3048 * Rebalancing is done by use of the ``struct maple_topiary``.
3054 mast->destroy = &destroy;
3055 l_mas.node = r_mas.node = m_mas.node = MAS_NONE;
3057 /* Check if this is not root and has sufficient data. */
3058 if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
3059 unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
3060 mast_spanning_rebalance(mast);
3062 mast->orig_l->depth = 0;
3065 * Each level of the tree is examined and balanced, pushing data to the left or
3066 * right, or rebalancing against left or right nodes is employed to avoid
3067 * rippling up the tree to limit the amount of churn. Once a new sub-section of
3068 * the tree is created, there may be a mix of new and old nodes. The old nodes
3069 * will have the incorrect parent pointers and currently be in two trees: the
3070 * original tree and the partially new tree. To remedy the parent pointers in
3071 * the old tree, the new data is swapped into the active tree and a walk down
3072 * the tree is performed and the parent pointers are updated.
3073 * See mas_descend_adopt() for more information..
3077 mast->bn->type = mte_node_type(mast->orig_l->node);
3078 split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
3079 &mid_split, mast->orig_l->min);
3080 mast_set_split_parents(mast, left, middle, right, split,
3082 mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
3085 * Copy data from next level in the tree to mast->bn from next
3088 memset(mast->bn, 0, sizeof(struct maple_big_node));
3089 mast->bn->type = mte_node_type(left);
3090 mast->orig_l->depth++;
3092 /* Root already stored in l->node. */
3093 if (mas_is_root_limits(mast->l))
3096 mast_ascend_free(mast);
3097 mast_combine_cp_left(mast);
3098 l_mas.offset = mast->bn->b_end;
3099 mab_set_b_end(mast->bn, &l_mas, left);
3100 mab_set_b_end(mast->bn, &m_mas, middle);
3101 mab_set_b_end(mast->bn, &r_mas, right);
3103 /* Copy anything necessary out of the right node. */
3104 mast_combine_cp_right(mast);
3106 mast->orig_l->last = mast->orig_l->max;
3108 if (mast_sufficient(mast))
3111 if (mast_overflow(mast))
3114 /* May be a new root stored in mast->bn */
3115 if (mas_is_root_limits(mast->orig_l))
3118 mast_spanning_rebalance(mast);
3120 /* rebalancing from other nodes may require another loop. */
3125 l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
3126 mte_node_type(mast->orig_l->node));
3127 mast->orig_l->depth++;
3128 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
3129 mte_set_parent(left, l_mas.node, slot);
3131 mte_set_parent(middle, l_mas.node, ++slot);
3134 mte_set_parent(right, l_mas.node, ++slot);
3136 if (mas_is_root_limits(mast->l)) {
3138 mast_new_root(mast, mas);
3140 mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
3143 if (!mte_dead_node(mast->orig_l->node))
3144 mat_add(&free, mast->orig_l->node);
3146 mas->depth = mast->orig_l->depth;
3147 *mast->orig_l = l_mas;
3148 mte_set_node_dead(mas->node);
3150 /* Set up mas for insertion. */
3151 mast->orig_l->depth = mas->depth;
3152 mast->orig_l->alloc = mas->alloc;
3153 *mas = *mast->orig_l;
3154 mas_wmb_replace(mas, &free, &destroy);
3155 mtree_range_walk(mas);
3156 return mast->bn->b_end;
3160 * mas_rebalance() - Rebalance a given node.
3161 * @mas: The maple state
3162 * @b_node: The big maple node.
3164 * Rebalance two nodes into a single node or two new nodes that are sufficient.
3165 * Continue upwards until tree is sufficient.
3167 * Return: the number of elements in b_node during the last loop.
3169 static inline int mas_rebalance(struct ma_state *mas,
3170 struct maple_big_node *b_node)
3172 char empty_count = mas_mt_height(mas);
3173 struct maple_subtree_state mast;
3174 unsigned char shift, b_end = ++b_node->b_end;
3176 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3177 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3179 trace_ma_op(__func__, mas);
3182 * Rebalancing occurs if a node is insufficient. Data is rebalanced
3183 * against the node to the right if it exists, otherwise the node to the
3184 * left of this node is rebalanced against this node. If rebalancing
3185 * causes just one node to be produced instead of two, then the parent
3186 * is also examined and rebalanced if it is insufficient. Every level
3187 * tries to combine the data in the same way. If one node contains the
3188 * entire range of the tree, then that node is used as a new root node.
3190 mas_node_count(mas, 1 + empty_count * 3);
3191 if (mas_is_err(mas))
3194 mast.orig_l = &l_mas;
3195 mast.orig_r = &r_mas;
3197 mast.bn->type = mte_node_type(mas->node);
3199 l_mas = r_mas = *mas;
3201 if (mas_next_sibling(&r_mas)) {
3202 mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
3203 r_mas.last = r_mas.index = r_mas.max;
3205 mas_prev_sibling(&l_mas);
3206 shift = mas_data_end(&l_mas) + 1;
3207 mab_shift_right(b_node, shift);
3208 mas->offset += shift;
3209 mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
3210 b_node->b_end = shift + b_end;
3211 l_mas.index = l_mas.last = l_mas.min;
3214 return mas_spanning_rebalance(mas, &mast, empty_count);
3218 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
3220 * @mas: The maple state
3221 * @end: The end of the left-most node.
3223 * During a mass-insert event (such as forking), it may be necessary to
3224 * rebalance the left-most node when it is not sufficient.
3226 static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
3228 enum maple_type mt = mte_node_type(mas->node);
3229 struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
3230 struct maple_enode *eparent;
3231 unsigned char offset, tmp, split = mt_slots[mt] / 2;
3232 void __rcu **l_slots, **slots;
3233 unsigned long *l_pivs, *pivs, gap;
3234 bool in_rcu = mt_in_rcu(mas->tree);
3236 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3239 mas_prev_sibling(&l_mas);
3243 /* Allocate for both left and right as well as parent. */
3244 mas_node_count(mas, 3);
3245 if (mas_is_err(mas))
3248 newnode = mas_pop_node(mas);
3254 newnode->parent = node->parent;
3255 slots = ma_slots(newnode, mt);
3256 pivs = ma_pivots(newnode, mt);
3257 left = mas_mn(&l_mas);
3258 l_slots = ma_slots(left, mt);
3259 l_pivs = ma_pivots(left, mt);
3260 if (!l_slots[split])
3262 tmp = mas_data_end(&l_mas) - split;
3264 memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
3265 memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
3266 pivs[tmp] = l_mas.max;
3267 memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
3268 memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
3270 l_mas.max = l_pivs[split];
3271 mas->min = l_mas.max + 1;
3272 eparent = mt_mk_node(mte_parent(l_mas.node),
3273 mas_parent_enum(&l_mas, l_mas.node));
3276 unsigned char max_p = mt_pivots[mt];
3277 unsigned char max_s = mt_slots[mt];
3280 memset(pivs + tmp, 0,
3281 sizeof(unsigned long *) * (max_p - tmp));
3283 if (tmp < mt_slots[mt])
3284 memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3286 memcpy(node, newnode, sizeof(struct maple_node));
3287 ma_set_meta(node, mt, 0, tmp - 1);
3288 mte_set_pivot(eparent, mte_parent_slot(l_mas.node),
3291 /* Remove data from l_pivs. */
3293 memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
3294 memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3295 ma_set_meta(left, mt, 0, split);
3300 /* RCU requires replacing both l_mas, mas, and parent. */
3301 mas->node = mt_mk_node(newnode, mt);
3302 ma_set_meta(newnode, mt, 0, tmp);
3304 new_left = mas_pop_node(mas);
3305 new_left->parent = left->parent;
3306 mt = mte_node_type(l_mas.node);
3307 slots = ma_slots(new_left, mt);
3308 pivs = ma_pivots(new_left, mt);
3309 memcpy(slots, l_slots, sizeof(void *) * split);
3310 memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
3311 ma_set_meta(new_left, mt, 0, split);
3312 l_mas.node = mt_mk_node(new_left, mt);
3314 /* replace parent. */
3315 offset = mte_parent_slot(mas->node);
3316 mt = mas_parent_enum(&l_mas, l_mas.node);
3317 parent = mas_pop_node(mas);
3318 slots = ma_slots(parent, mt);
3319 pivs = ma_pivots(parent, mt);
3320 memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node));
3321 rcu_assign_pointer(slots[offset], mas->node);
3322 rcu_assign_pointer(slots[offset - 1], l_mas.node);
3323 pivs[offset - 1] = l_mas.max;
3324 eparent = mt_mk_node(parent, mt);
3326 gap = mas_leaf_max_gap(mas);
3327 mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
3328 gap = mas_leaf_max_gap(&l_mas);
3329 mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
3333 mas_replace(mas, false);
3335 mas_update_gap(mas);
3339 * mas_split_final_node() - Split the final node in a subtree operation.
3340 * @mast: the maple subtree state
3341 * @mas: The maple state
3342 * @height: The height of the tree in case it's a new root.
3344 static inline bool mas_split_final_node(struct maple_subtree_state *mast,
3345 struct ma_state *mas, int height)
3347 struct maple_enode *ancestor;
3349 if (mte_is_root(mas->node)) {
3350 if (mt_is_alloc(mas->tree))
3351 mast->bn->type = maple_arange_64;
3353 mast->bn->type = maple_range_64;
3354 mas->depth = height;
3357 * Only a single node is used here, could be root.
3358 * The Big_node data should just fit in a single node.
3360 ancestor = mas_new_ma_node(mas, mast->bn);
3361 mte_set_parent(mast->l->node, ancestor, mast->l->offset);
3362 mte_set_parent(mast->r->node, ancestor, mast->r->offset);
3363 mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
3365 mast->l->node = ancestor;
3366 mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
3367 mas->offset = mast->bn->b_end - 1;
3372 * mast_fill_bnode() - Copy data into the big node in the subtree state
3373 * @mast: The maple subtree state
3374 * @mas: the maple state
3375 * @skip: The number of entries to skip for new nodes insertion.
3377 static inline void mast_fill_bnode(struct maple_subtree_state *mast,
3378 struct ma_state *mas,
3382 struct maple_enode *old = mas->node;
3383 unsigned char split;
3385 memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
3386 memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
3387 memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
3388 mast->bn->b_end = 0;
3390 if (mte_is_root(mas->node)) {
3394 mat_add(mast->free, old);
3395 mas->offset = mte_parent_slot(mas->node);
3398 if (cp && mast->l->offset)
3399 mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
3401 split = mast->bn->b_end;
3402 mab_set_b_end(mast->bn, mast->l, mast->l->node);
3403 mast->r->offset = mast->bn->b_end;
3404 mab_set_b_end(mast->bn, mast->r, mast->r->node);
3405 if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
3409 mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
3410 mast->bn, mast->bn->b_end);
3413 mast->bn->type = mte_node_type(mas->node);
3417 * mast_split_data() - Split the data in the subtree state big node into regular
3419 * @mast: The maple subtree state
3420 * @mas: The maple state
3421 * @split: The location to split the big node
3423 static inline void mast_split_data(struct maple_subtree_state *mast,
3424 struct ma_state *mas, unsigned char split)
3426 unsigned char p_slot;
3428 mab_mas_cp(mast->bn, 0, split, mast->l, true);
3429 mte_set_pivot(mast->r->node, 0, mast->r->max);
3430 mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
3431 mast->l->offset = mte_parent_slot(mas->node);
3432 mast->l->max = mast->bn->pivot[split];
3433 mast->r->min = mast->l->max + 1;
3434 if (mte_is_leaf(mas->node))
3437 p_slot = mast->orig_l->offset;
3438 mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
3440 mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
3445 * mas_push_data() - Instead of splitting a node, it is beneficial to push the
3446 * data to the right or left node if there is room.
3447 * @mas: The maple state
3448 * @height: The current height of the maple state
3449 * @mast: The maple subtree state
3450 * @left: Push left or not.
3452 * Keeping the height of the tree low means faster lookups.
3454 * Return: True if pushed, false otherwise.
3456 static inline bool mas_push_data(struct ma_state *mas, int height,
3457 struct maple_subtree_state *mast, bool left)
3459 unsigned char slot_total = mast->bn->b_end;
3460 unsigned char end, space, split;
3462 MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
3464 tmp_mas.depth = mast->l->depth;
3466 if (left && !mas_prev_sibling(&tmp_mas))
3468 else if (!left && !mas_next_sibling(&tmp_mas))
3471 end = mas_data_end(&tmp_mas);
3473 space = 2 * mt_slot_count(mas->node) - 2;
3474 /* -2 instead of -1 to ensure there isn't a triple split */
3475 if (ma_is_leaf(mast->bn->type))
3478 if (mas->max == ULONG_MAX)
3481 if (slot_total >= space)
3484 /* Get the data; Fill mast->bn */
3487 mab_shift_right(mast->bn, end + 1);
3488 mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
3489 mast->bn->b_end = slot_total + 1;
3491 mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
3494 /* Configure mast for splitting of mast->bn */
3495 split = mt_slots[mast->bn->type] - 2;
3497 /* Switch mas to prev node */
3498 mat_add(mast->free, mas->node);
3500 /* Start using mast->l for the left side. */
3501 tmp_mas.node = mast->l->node;
3504 mat_add(mast->free, tmp_mas.node);
3505 tmp_mas.node = mast->r->node;
3507 split = slot_total - split;
3509 split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
3510 /* Update parent slot for split calculation. */
3512 mast->orig_l->offset += end + 1;
3514 mast_split_data(mast, mas, split);
3515 mast_fill_bnode(mast, mas, 2);
3516 mas_split_final_node(mast, mas, height + 1);
3521 * mas_split() - Split data that is too big for one node into two.
3522 * @mas: The maple state
3523 * @b_node: The maple big node
3524 * Return: 1 on success, 0 on failure.
3526 static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
3529 struct maple_subtree_state mast;
3531 unsigned char mid_split, split = 0;
3534 * Splitting is handled differently from any other B-tree; the Maple
3535 * Tree splits upwards. Splitting up means that the split operation
3536 * occurs when the walk of the tree hits the leaves and not on the way
3537 * down. The reason for splitting up is that it is impossible to know
3538 * how much space will be needed until the leaf is (or leaves are)
3539 * reached. Since overwriting data is allowed and a range could
3540 * overwrite more than one range or result in changing one entry into 3
3541 * entries, it is impossible to know if a split is required until the
3544 * Splitting is a balancing act between keeping allocations to a minimum
3545 * and avoiding a 'jitter' event where a tree is expanded to make room
3546 * for an entry followed by a contraction when the entry is removed. To
3547 * accomplish the balance, there are empty slots remaining in both left
3548 * and right nodes after a split.
3550 MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3551 MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3552 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
3553 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
3554 MA_TOPIARY(mat, mas->tree);
3556 trace_ma_op(__func__, mas);
3557 mas->depth = mas_mt_height(mas);
3558 /* Allocation failures will happen early. */
3559 mas_node_count(mas, 1 + mas->depth * 2);
3560 if (mas_is_err(mas))
3565 mast.orig_l = &prev_l_mas;
3566 mast.orig_r = &prev_r_mas;
3570 while (height++ <= mas->depth) {
3571 if (mt_slots[b_node->type] > b_node->b_end) {
3572 mas_split_final_node(&mast, mas, height);
3576 l_mas = r_mas = *mas;
3577 l_mas.node = mas_new_ma_node(mas, b_node);
3578 r_mas.node = mas_new_ma_node(mas, b_node);
3580 * Another way that 'jitter' is avoided is to terminate a split up early if the
3581 * left or right node has space to spare. This is referred to as "pushing left"
3582 * or "pushing right" and is similar to the B* tree, except the nodes left or
3583 * right can rarely be reused due to RCU, but the ripple upwards is halted which
3584 * is a significant savings.
3586 /* Try to push left. */
3587 if (mas_push_data(mas, height, &mast, true))
3590 /* Try to push right. */
3591 if (mas_push_data(mas, height, &mast, false))
3594 split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
3595 mast_split_data(&mast, mas, split);
3597 * Usually correct, mab_mas_cp in the above call overwrites
3600 mast.r->max = mas->max;
3601 mast_fill_bnode(&mast, mas, 1);
3602 prev_l_mas = *mast.l;
3603 prev_r_mas = *mast.r;
3606 /* Set the original node as dead */
3607 mat_add(mast.free, mas->node);
3608 mas->node = l_mas.node;
3609 mas_wmb_replace(mas, mast.free, NULL);
3610 mtree_range_walk(mas);
3615 * mas_reuse_node() - Reuse the node to store the data.
3616 * @wr_mas: The maple write state
3617 * @bn: The maple big node
3618 * @end: The end of the data.
3620 * Will always return false in RCU mode.
3622 * Return: True if node was reused, false otherwise.
3624 static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
3625 struct maple_big_node *bn, unsigned char end)
3627 /* Need to be rcu safe. */
3628 if (mt_in_rcu(wr_mas->mas->tree))
3631 if (end > bn->b_end) {
3632 int clear = mt_slots[wr_mas->type] - bn->b_end;
3634 memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
3635 memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
3637 mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
3642 * mas_commit_b_node() - Commit the big node into the tree.
3643 * @wr_mas: The maple write state
3644 * @b_node: The maple big node
3645 * @end: The end of the data.
3647 static inline int mas_commit_b_node(struct ma_wr_state *wr_mas,
3648 struct maple_big_node *b_node, unsigned char end)
3650 struct maple_node *node;
3651 unsigned char b_end = b_node->b_end;
3652 enum maple_type b_type = b_node->type;
3654 if ((b_end < mt_min_slots[b_type]) &&
3655 (!mte_is_root(wr_mas->mas->node)) &&
3656 (mas_mt_height(wr_mas->mas) > 1))
3657 return mas_rebalance(wr_mas->mas, b_node);
3659 if (b_end >= mt_slots[b_type])
3660 return mas_split(wr_mas->mas, b_node);
3662 if (mas_reuse_node(wr_mas, b_node, end))
3665 mas_node_count(wr_mas->mas, 1);
3666 if (mas_is_err(wr_mas->mas))
3669 node = mas_pop_node(wr_mas->mas);
3670 node->parent = mas_mn(wr_mas->mas)->parent;
3671 wr_mas->mas->node = mt_mk_node(node, b_type);
3672 mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
3673 mas_replace(wr_mas->mas, false);
3675 mas_update_gap(wr_mas->mas);
3680 * mas_root_expand() - Expand a root to a node
3681 * @mas: The maple state
3682 * @entry: The entry to store into the tree
3684 static inline int mas_root_expand(struct ma_state *mas, void *entry)
3686 void *contents = mas_root_locked(mas);
3687 enum maple_type type = maple_leaf_64;
3688 struct maple_node *node;
3690 unsigned long *pivots;
3693 mas_node_count(mas, 1);
3694 if (unlikely(mas_is_err(mas)))
3697 node = mas_pop_node(mas);
3698 pivots = ma_pivots(node, type);
3699 slots = ma_slots(node, type);
3700 node->parent = ma_parent_ptr(
3701 ((unsigned long)mas->tree | MA_ROOT_PARENT));
3702 mas->node = mt_mk_node(node, type);
3706 rcu_assign_pointer(slots[slot], contents);
3707 if (likely(mas->index > 1))
3710 pivots[slot++] = mas->index - 1;
3713 rcu_assign_pointer(slots[slot], entry);
3715 pivots[slot] = mas->last;
3716 if (mas->last != ULONG_MAX)
3719 mas_set_height(mas);
3720 ma_set_meta(node, maple_leaf_64, 0, slot);
3721 /* swap the new root into the tree */
3722 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3726 static inline void mas_store_root(struct ma_state *mas, void *entry)
3728 if (likely((mas->last != 0) || (mas->index != 0)))
3729 mas_root_expand(mas, entry);
3730 else if (((unsigned long) (entry) & 3) == 2)
3731 mas_root_expand(mas, entry);
3733 rcu_assign_pointer(mas->tree->ma_root, entry);
3734 mas->node = MAS_START;
3739 * mas_is_span_wr() - Check if the write needs to be treated as a write that
3741 * @mas: The maple state
3742 * @piv: The pivot value being written
3743 * @type: The maple node type
3744 * @entry: The data to write
3746 * Spanning writes are writes that start in one node and end in another OR if
3747 * the write of a %NULL will cause the node to end with a %NULL.
3749 * Return: True if this is a spanning write, false otherwise.
3751 static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
3754 unsigned long last = wr_mas->mas->last;
3755 unsigned long piv = wr_mas->r_max;
3756 enum maple_type type = wr_mas->type;
3757 void *entry = wr_mas->entry;
3759 /* Contained in this pivot */
3763 max = wr_mas->mas->max;
3764 if (unlikely(ma_is_leaf(type))) {
3765 /* Fits in the node, but may span slots. */
3769 /* Writes to the end of the node but not null. */
3770 if ((last == max) && entry)
3774 * Writing ULONG_MAX is not a spanning write regardless of the
3775 * value being written as long as the range fits in the node.
3777 if ((last == ULONG_MAX) && (last == max))
3779 } else if (piv == last) {
3783 /* Detect spanning store wr walk */
3784 if (last == ULONG_MAX)
3788 trace_ma_write(__func__, wr_mas->mas, piv, entry);
3793 static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
3795 wr_mas->type = mte_node_type(wr_mas->mas->node);
3796 mas_wr_node_walk(wr_mas);
3797 wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
3800 static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
3802 wr_mas->mas->max = wr_mas->r_max;
3803 wr_mas->mas->min = wr_mas->r_min;
3804 wr_mas->mas->node = wr_mas->content;
3805 wr_mas->mas->offset = 0;
3806 wr_mas->mas->depth++;
3809 * mas_wr_walk() - Walk the tree for a write.
3810 * @wr_mas: The maple write state
3812 * Uses mas_slot_locked() and does not need to worry about dead nodes.
3814 * Return: True if it's contained in a node, false on spanning write.
3816 static bool mas_wr_walk(struct ma_wr_state *wr_mas)
3818 struct ma_state *mas = wr_mas->mas;
3821 mas_wr_walk_descend(wr_mas);
3822 if (unlikely(mas_is_span_wr(wr_mas)))
3825 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3827 if (ma_is_leaf(wr_mas->type))
3830 mas_wr_walk_traverse(wr_mas);
3836 static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
3838 struct ma_state *mas = wr_mas->mas;
3841 mas_wr_walk_descend(wr_mas);
3842 wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3844 if (ma_is_leaf(wr_mas->type))
3846 mas_wr_walk_traverse(wr_mas);
3852 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
3853 * @l_wr_mas: The left maple write state
3854 * @r_wr_mas: The right maple write state
3856 static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
3857 struct ma_wr_state *r_wr_mas)
3859 struct ma_state *r_mas = r_wr_mas->mas;
3860 struct ma_state *l_mas = l_wr_mas->mas;
3861 unsigned char l_slot;
3863 l_slot = l_mas->offset;
3864 if (!l_wr_mas->content)
3865 l_mas->index = l_wr_mas->r_min;
3867 if ((l_mas->index == l_wr_mas->r_min) &&
3869 !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
3871 l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
3873 l_mas->index = l_mas->min;
3875 l_mas->offset = l_slot - 1;
3878 if (!r_wr_mas->content) {
3879 if (r_mas->last < r_wr_mas->r_max)
3880 r_mas->last = r_wr_mas->r_max;
3882 } else if ((r_mas->last == r_wr_mas->r_max) &&
3883 (r_mas->last < r_mas->max) &&
3884 !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
3885 r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
3886 r_wr_mas->type, r_mas->offset + 1);
3891 static inline void *mas_state_walk(struct ma_state *mas)
3895 entry = mas_start(mas);
3896 if (mas_is_none(mas))
3899 if (mas_is_ptr(mas))
3902 return mtree_range_walk(mas);
3906 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
3909 * @mas: The maple state.
3911 * Note: Leaves mas in undesirable state.
3912 * Return: The entry for @mas->index or %NULL on dead node.
3914 static inline void *mtree_lookup_walk(struct ma_state *mas)
3916 unsigned long *pivots;
3917 unsigned char offset;
3918 struct maple_node *node;
3919 struct maple_enode *next;
3920 enum maple_type type;
3929 node = mte_to_node(next);
3930 type = mte_node_type(next);
3931 pivots = ma_pivots(node, type);
3932 end = ma_data_end(node, type, pivots, max);
3933 if (unlikely(ma_dead_node(node)))
3936 if (pivots[offset] >= mas->index) {
3937 max = pivots[offset];
3940 } while (++offset < end);
3942 slots = ma_slots(node, type);
3943 next = mt_slot(mas->tree, slots, offset);
3944 if (unlikely(ma_dead_node(node)))
3946 } while (!ma_is_leaf(type));
3948 return (void *) next;
3956 * mas_new_root() - Create a new root node that only contains the entry passed
3958 * @mas: The maple state
3959 * @entry: The entry to store.
3961 * Only valid when the index == 0 and the last == ULONG_MAX
3963 * Return 0 on error, 1 on success.
3965 static inline int mas_new_root(struct ma_state *mas, void *entry)
3967 struct maple_enode *root = mas_root_locked(mas);
3968 enum maple_type type = maple_leaf_64;
3969 struct maple_node *node;
3971 unsigned long *pivots;
3973 if (!entry && !mas->index && mas->last == ULONG_MAX) {
3975 mas_set_height(mas);
3976 rcu_assign_pointer(mas->tree->ma_root, entry);
3977 mas->node = MAS_START;
3981 mas_node_count(mas, 1);
3982 if (mas_is_err(mas))
3985 node = mas_pop_node(mas);
3986 pivots = ma_pivots(node, type);
3987 slots = ma_slots(node, type);
3988 node->parent = ma_parent_ptr(
3989 ((unsigned long)mas->tree | MA_ROOT_PARENT));
3990 mas->node = mt_mk_node(node, type);
3991 rcu_assign_pointer(slots[0], entry);
3992 pivots[0] = mas->last;
3994 mas_set_height(mas);
3995 rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3998 if (xa_is_node(root))
3999 mte_destroy_walk(root, mas->tree);
4004 * mas_wr_spanning_store() - Create a subtree with the store operation completed
4005 * and new nodes where necessary, then place the sub-tree in the actual tree.
4006 * Note that mas is expected to point to the node which caused the store to
4008 * @wr_mas: The maple write state
4010 * Return: 0 on error, positive on success.
4012 static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
4014 struct maple_subtree_state mast;
4015 struct maple_big_node b_node;
4016 struct ma_state *mas;
4017 unsigned char height;
4019 /* Left and Right side of spanning store */
4020 MA_STATE(l_mas, NULL, 0, 0);
4021 MA_STATE(r_mas, NULL, 0, 0);
4023 MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
4024 MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
4027 * A store operation that spans multiple nodes is called a spanning
4028 * store and is handled early in the store call stack by the function
4029 * mas_is_span_wr(). When a spanning store is identified, the maple
4030 * state is duplicated. The first maple state walks the left tree path
4031 * to ``index``, the duplicate walks the right tree path to ``last``.
4032 * The data in the two nodes are combined into a single node, two nodes,
4033 * or possibly three nodes (see the 3-way split above). A ``NULL``
4034 * written to the last entry of a node is considered a spanning store as
4035 * a rebalance is required for the operation to complete and an overflow
4036 * of data may happen.
4039 trace_ma_op(__func__, mas);
4041 if (unlikely(!mas->index && mas->last == ULONG_MAX))
4042 return mas_new_root(mas, wr_mas->entry);
4044 * Node rebalancing may occur due to this store, so there may be three new
4045 * entries per level plus a new root.
4047 height = mas_mt_height(mas);
4048 mas_node_count(mas, 1 + height * 3);
4049 if (mas_is_err(mas))
4053 * Set up right side. Need to get to the next offset after the spanning
4054 * store to ensure it's not NULL and to combine both the next node and
4055 * the node with the start together.
4058 /* Avoid overflow, walk to next slot in the tree. */
4062 r_mas.index = r_mas.last;
4063 mas_wr_walk_index(&r_wr_mas);
4064 r_mas.last = r_mas.index = mas->last;
4066 /* Set up left side. */
4068 mas_wr_walk_index(&l_wr_mas);
4070 if (!wr_mas->entry) {
4071 mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
4072 mas->offset = l_mas.offset;
4073 mas->index = l_mas.index;
4074 mas->last = l_mas.last = r_mas.last;
4077 /* expanding NULLs may make this cover the entire range */
4078 if (!l_mas.index && r_mas.last == ULONG_MAX) {
4079 mas_set_range(mas, 0, ULONG_MAX);
4080 return mas_new_root(mas, wr_mas->entry);
4083 memset(&b_node, 0, sizeof(struct maple_big_node));
4084 /* Copy l_mas and store the value in b_node. */
4085 mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
4086 /* Copy r_mas into b_node. */
4087 if (r_mas.offset <= r_wr_mas.node_end)
4088 mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
4089 &b_node, b_node.b_end + 1);
4093 /* Stop spanning searches by searching for just index. */
4094 l_mas.index = l_mas.last = mas->index;
4097 mast.orig_l = &l_mas;
4098 mast.orig_r = &r_mas;
4099 /* Combine l_mas and r_mas and split them up evenly again. */
4100 return mas_spanning_rebalance(mas, &mast, height + 1);
4104 * mas_wr_node_store() - Attempt to store the value in a node
4105 * @wr_mas: The maple write state
4107 * Attempts to reuse the node, but may allocate.
4109 * Return: True if stored, false otherwise
4111 static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
4113 struct ma_state *mas = wr_mas->mas;
4114 void __rcu **dst_slots;
4115 unsigned long *dst_pivots;
4116 unsigned char dst_offset;
4117 unsigned char new_end = wr_mas->node_end;
4118 unsigned char offset;
4119 unsigned char node_slots = mt_slots[wr_mas->type];
4120 struct maple_node reuse, *newnode;
4121 unsigned char copy_size, max_piv = mt_pivots[wr_mas->type];
4122 bool in_rcu = mt_in_rcu(mas->tree);
4124 offset = mas->offset;
4125 if (mas->last == wr_mas->r_max) {
4126 /* runs right to the end of the node */
4127 if (mas->last == mas->max)
4129 /* don't copy this offset */
4130 wr_mas->offset_end++;
4131 } else if (mas->last < wr_mas->r_max) {
4132 /* new range ends in this range */
4133 if (unlikely(wr_mas->r_max == ULONG_MAX))
4134 mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type);
4138 if (wr_mas->end_piv == mas->last)
4139 wr_mas->offset_end++;
4141 new_end -= wr_mas->offset_end - offset - 1;
4144 /* new range starts within a range */
4145 if (wr_mas->r_min < mas->index)
4148 /* Not enough room */
4149 if (new_end >= node_slots)
4152 /* Not enough data. */
4153 if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
4154 !(mas->mas_flags & MA_STATE_BULK))
4159 mas_node_count(mas, 1);
4160 if (mas_is_err(mas))
4163 newnode = mas_pop_node(mas);
4165 memset(&reuse, 0, sizeof(struct maple_node));
4169 newnode->parent = mas_mn(mas)->parent;
4170 dst_pivots = ma_pivots(newnode, wr_mas->type);
4171 dst_slots = ma_slots(newnode, wr_mas->type);
4172 /* Copy from start to insert point */
4173 memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * (offset + 1));
4174 memcpy(dst_slots, wr_mas->slots, sizeof(void *) * (offset + 1));
4175 dst_offset = offset;
4177 /* Handle insert of new range starting after old range */
4178 if (wr_mas->r_min < mas->index) {
4180 rcu_assign_pointer(dst_slots[dst_offset], wr_mas->content);
4181 dst_pivots[dst_offset++] = mas->index - 1;
4184 /* Store the new entry and range end. */
4185 if (dst_offset < max_piv)
4186 dst_pivots[dst_offset] = mas->last;
4187 mas->offset = dst_offset;
4188 rcu_assign_pointer(dst_slots[dst_offset], wr_mas->entry);
4191 * this range wrote to the end of the node or it overwrote the rest of
4194 if (wr_mas->offset_end > wr_mas->node_end || mas->last >= mas->max) {
4195 new_end = dst_offset;
4200 /* Copy to the end of node if necessary. */
4201 copy_size = wr_mas->node_end - wr_mas->offset_end + 1;
4202 memcpy(dst_slots + dst_offset, wr_mas->slots + wr_mas->offset_end,
4203 sizeof(void *) * copy_size);
4204 if (dst_offset < max_piv) {
4205 if (copy_size > max_piv - dst_offset)
4206 copy_size = max_piv - dst_offset;
4208 memcpy(dst_pivots + dst_offset,
4209 wr_mas->pivots + wr_mas->offset_end,
4210 sizeof(unsigned long) * copy_size);
4213 if ((wr_mas->node_end == node_slots - 1) && (new_end < node_slots - 1))
4214 dst_pivots[new_end] = mas->max;
4217 mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
4219 mas->node = mt_mk_node(newnode, wr_mas->type);
4220 mas_replace(mas, false);
4222 memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
4224 trace_ma_write(__func__, mas, 0, wr_mas->entry);
4225 mas_update_gap(mas);
4230 * mas_wr_slot_store: Attempt to store a value in a slot.
4231 * @wr_mas: the maple write state
4233 * Return: True if stored, false otherwise
4235 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
4237 struct ma_state *mas = wr_mas->mas;
4238 unsigned long lmax; /* Logical max. */
4239 unsigned char offset = mas->offset;
4241 if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
4242 (offset != wr_mas->node_end)))
4245 if (offset == wr_mas->node_end - 1)
4248 lmax = wr_mas->pivots[offset + 1];
4250 /* going to overwrite too many slots. */
4251 if (lmax < mas->last)
4254 if (wr_mas->r_min == mas->index) {
4255 /* overwriting two or more ranges with one. */
4256 if (lmax == mas->last)
4259 /* Overwriting all of offset and a portion of offset + 1. */
4260 rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
4261 wr_mas->pivots[offset] = mas->last;
4265 /* Doesn't end on the next range end. */
4266 if (lmax != mas->last)
4269 /* Overwriting a portion of offset and all of offset + 1 */
4270 if ((offset + 1 < mt_pivots[wr_mas->type]) &&
4271 (wr_mas->entry || wr_mas->pivots[offset + 1]))
4272 wr_mas->pivots[offset + 1] = mas->last;
4274 rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
4275 wr_mas->pivots[offset] = mas->index - 1;
4276 mas->offset++; /* Keep mas accurate. */
4279 trace_ma_write(__func__, mas, 0, wr_mas->entry);
4280 mas_update_gap(mas);
4284 static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
4286 while ((wr_mas->mas->last > wr_mas->end_piv) &&
4287 (wr_mas->offset_end < wr_mas->node_end))
4288 wr_mas->end_piv = wr_mas->pivots[++wr_mas->offset_end];
4290 if (wr_mas->mas->last > wr_mas->end_piv)
4291 wr_mas->end_piv = wr_mas->mas->max;
4294 static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
4296 struct ma_state *mas = wr_mas->mas;
4298 if (mas->last < wr_mas->end_piv && !wr_mas->slots[wr_mas->offset_end])
4299 mas->last = wr_mas->end_piv;
4301 /* Check next slot(s) if we are overwriting the end */
4302 if ((mas->last == wr_mas->end_piv) &&
4303 (wr_mas->node_end != wr_mas->offset_end) &&
4304 !wr_mas->slots[wr_mas->offset_end + 1]) {
4305 wr_mas->offset_end++;
4306 if (wr_mas->offset_end == wr_mas->node_end)
4307 mas->last = mas->max;
4309 mas->last = wr_mas->pivots[wr_mas->offset_end];
4310 wr_mas->end_piv = mas->last;
4313 if (!wr_mas->content) {
4314 /* If this one is null, the next and prev are not */
4315 mas->index = wr_mas->r_min;
4317 /* Check prev slot if we are overwriting the start */
4318 if (mas->index == wr_mas->r_min && mas->offset &&
4319 !wr_mas->slots[mas->offset - 1]) {
4321 wr_mas->r_min = mas->index =
4322 mas_safe_min(mas, wr_mas->pivots, mas->offset);
4323 wr_mas->r_max = wr_mas->pivots[mas->offset];
4328 static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
4330 unsigned char end = wr_mas->node_end;
4331 unsigned char new_end = end + 1;
4332 struct ma_state *mas = wr_mas->mas;
4333 unsigned char node_pivots = mt_pivots[wr_mas->type];
4335 if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) {
4336 if (new_end < node_pivots)
4337 wr_mas->pivots[new_end] = wr_mas->pivots[end];
4339 if (new_end < node_pivots)
4340 ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
4342 rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->entry);
4343 mas->offset = new_end;
4344 wr_mas->pivots[end] = mas->index - 1;
4349 if ((mas->index == wr_mas->r_min) && (mas->last < wr_mas->r_max)) {
4350 if (new_end < node_pivots)
4351 wr_mas->pivots[new_end] = wr_mas->pivots[end];
4353 rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content);
4354 if (new_end < node_pivots)
4355 ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
4357 wr_mas->pivots[end] = mas->last;
4358 rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry);
4366 * mas_wr_bnode() - Slow path for a modification.
4367 * @wr_mas: The write maple state
4369 * This is where split, rebalance end up.
4371 static void mas_wr_bnode(struct ma_wr_state *wr_mas)
4373 struct maple_big_node b_node;
4375 trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
4376 memset(&b_node, 0, sizeof(struct maple_big_node));
4377 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
4378 mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end);
4381 static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
4383 unsigned char node_slots;
4384 unsigned char node_size;
4385 struct ma_state *mas = wr_mas->mas;
4387 /* Direct replacement */
4388 if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
4389 rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
4390 if (!!wr_mas->entry ^ !!wr_mas->content)
4391 mas_update_gap(mas);
4395 /* Attempt to append */
4396 node_slots = mt_slots[wr_mas->type];
4397 node_size = wr_mas->node_end - wr_mas->offset_end + mas->offset + 2;
4398 if (mas->max == ULONG_MAX)
4401 /* slot and node store will not fit, go to the slow path */
4402 if (unlikely(node_size >= node_slots))
4405 if (wr_mas->entry && (wr_mas->node_end < node_slots - 1) &&
4406 (mas->offset == wr_mas->node_end) && mas_wr_append(wr_mas)) {
4407 if (!wr_mas->content || !wr_mas->entry)
4408 mas_update_gap(mas);
4412 if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
4414 else if (mas_wr_node_store(wr_mas))
4417 if (mas_is_err(mas))
4421 mas_wr_bnode(wr_mas);
4425 * mas_wr_store_entry() - Internal call to store a value
4426 * @mas: The maple state
4427 * @entry: The entry to store.
4429 * Return: The contents that was stored at the index.
4431 static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
4433 struct ma_state *mas = wr_mas->mas;
4435 wr_mas->content = mas_start(mas);
4436 if (mas_is_none(mas) || mas_is_ptr(mas)) {
4437 mas_store_root(mas, wr_mas->entry);
4438 return wr_mas->content;
4441 if (unlikely(!mas_wr_walk(wr_mas))) {
4442 mas_wr_spanning_store(wr_mas);
4443 return wr_mas->content;
4446 /* At this point, we are at the leaf node that needs to be altered. */
4447 wr_mas->end_piv = wr_mas->r_max;
4448 mas_wr_end_piv(wr_mas);
4451 mas_wr_extend_null(wr_mas);
4453 /* New root for a single pointer */
4454 if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
4455 mas_new_root(mas, wr_mas->entry);
4456 return wr_mas->content;
4459 mas_wr_modify(wr_mas);
4460 return wr_mas->content;
4464 * mas_insert() - Internal call to insert a value
4465 * @mas: The maple state
4466 * @entry: The entry to store
4468 * Return: %NULL or the contents that already exists at the requested index
4469 * otherwise. The maple state needs to be checked for error conditions.
4471 static inline void *mas_insert(struct ma_state *mas, void *entry)
4473 MA_WR_STATE(wr_mas, mas, entry);
4476 * Inserting a new range inserts either 0, 1, or 2 pivots within the
4477 * tree. If the insert fits exactly into an existing gap with a value
4478 * of NULL, then the slot only needs to be written with the new value.
4479 * If the range being inserted is adjacent to another range, then only a
4480 * single pivot needs to be inserted (as well as writing the entry). If
4481 * the new range is within a gap but does not touch any other ranges,
4482 * then two pivots need to be inserted: the start - 1, and the end. As
4483 * usual, the entry must be written. Most operations require a new node
4484 * to be allocated and replace an existing node to ensure RCU safety,
4485 * when in RCU mode. The exception to requiring a newly allocated node
4486 * is when inserting at the end of a node (appending). When done
4487 * carefully, appending can reuse the node in place.
4489 wr_mas.content = mas_start(mas);
4493 if (mas_is_none(mas) || mas_is_ptr(mas)) {
4494 mas_store_root(mas, entry);
4498 /* spanning writes always overwrite something */
4499 if (!mas_wr_walk(&wr_mas))
4502 /* At this point, we are at the leaf node that needs to be altered. */
4503 wr_mas.offset_end = mas->offset;
4504 wr_mas.end_piv = wr_mas.r_max;
4506 if (wr_mas.content || (mas->last > wr_mas.r_max))
4512 mas_wr_modify(&wr_mas);
4513 return wr_mas.content;
4516 mas_set_err(mas, -EEXIST);
4517 return wr_mas.content;
4522 * mas_prev_node() - Find the prev non-null entry at the same level in the
4523 * tree. The prev value will be mas->node[mas->offset] or MAS_NONE.
4524 * @mas: The maple state
4525 * @min: The lower limit to search
4527 * The prev node value will be mas->node[mas->offset] or MAS_NONE.
4528 * Return: 1 if the node is dead, 0 otherwise.
4530 static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
4535 struct maple_node *node;
4536 struct maple_enode *enode;
4537 unsigned long *pivots;
4539 if (mas_is_none(mas))
4545 if (ma_is_root(node))
4549 if (unlikely(mas_ascend(mas)))
4551 offset = mas->offset;
4556 mt = mte_node_type(mas->node);
4558 slots = ma_slots(node, mt);
4559 pivots = ma_pivots(node, mt);
4560 if (unlikely(ma_dead_node(node)))
4563 mas->max = pivots[offset];
4565 mas->min = pivots[offset - 1] + 1;
4566 if (unlikely(ma_dead_node(node)))
4574 enode = mas_slot(mas, slots, offset);
4575 if (unlikely(ma_dead_node(node)))
4579 mt = mte_node_type(mas->node);
4581 slots = ma_slots(node, mt);
4582 pivots = ma_pivots(node, mt);
4583 offset = ma_data_end(node, mt, pivots, mas->max);
4584 if (unlikely(ma_dead_node(node)))
4588 mas->min = pivots[offset - 1] + 1;
4590 if (offset < mt_pivots[mt])
4591 mas->max = pivots[offset];
4597 mas->node = mas_slot(mas, slots, offset);
4598 if (unlikely(ma_dead_node(node)))
4601 mas->offset = mas_data_end(mas);
4602 if (unlikely(mte_dead_node(mas->node)))
4608 mas->offset = offset;
4610 mas->min = pivots[offset - 1] + 1;
4612 if (unlikely(ma_dead_node(node)))
4615 mas->node = MAS_NONE;
4620 * mas_next_node() - Get the next node at the same level in the tree.
4621 * @mas: The maple state
4622 * @max: The maximum pivot value to check.
4624 * The next value will be mas->node[mas->offset] or MAS_NONE.
4625 * Return: 1 on dead node, 0 otherwise.
4627 static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
4630 unsigned long min, pivot;
4631 unsigned long *pivots;
4632 struct maple_enode *enode;
4634 unsigned char offset;
4635 unsigned char node_end;
4639 if (mas->max >= max)
4644 if (ma_is_root(node))
4651 if (unlikely(mas_ascend(mas)))
4654 offset = mas->offset;
4657 mt = mte_node_type(mas->node);
4658 pivots = ma_pivots(node, mt);
4659 node_end = ma_data_end(node, mt, pivots, mas->max);
4660 if (unlikely(ma_dead_node(node)))
4663 } while (unlikely(offset == node_end));
4665 slots = ma_slots(node, mt);
4666 pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
4667 while (unlikely(level > 1)) {
4668 /* Descend, if necessary */
4669 enode = mas_slot(mas, slots, offset);
4670 if (unlikely(ma_dead_node(node)))
4676 mt = mte_node_type(mas->node);
4677 slots = ma_slots(node, mt);
4678 pivots = ma_pivots(node, mt);
4679 if (unlikely(ma_dead_node(node)))
4686 enode = mas_slot(mas, slots, offset);
4687 if (unlikely(ma_dead_node(node)))
4696 if (unlikely(ma_dead_node(node)))
4699 mas->node = MAS_NONE;
4704 * mas_next_nentry() - Get the next node entry
4705 * @mas: The maple state
4706 * @max: The maximum value to check
4707 * @*range_start: Pointer to store the start of the range.
4709 * Sets @mas->offset to the offset of the next node entry, @mas->last to the
4710 * pivot of the entry.
4712 * Return: The next entry, %NULL otherwise
4714 static inline void *mas_next_nentry(struct ma_state *mas,
4715 struct maple_node *node, unsigned long max, enum maple_type type)
4717 unsigned char count;
4718 unsigned long pivot;
4719 unsigned long *pivots;
4723 if (mas->last == mas->max) {
4724 mas->index = mas->max;
4728 slots = ma_slots(node, type);
4729 pivots = ma_pivots(node, type);
4730 count = ma_data_end(node, type, pivots, mas->max);
4731 if (unlikely(ma_dead_node(node)))
4734 mas->index = mas_safe_min(mas, pivots, mas->offset);
4735 if (unlikely(ma_dead_node(node)))
4738 if (mas->index > max)
4741 if (mas->offset > count)
4744 while (mas->offset < count) {
4745 pivot = pivots[mas->offset];
4746 entry = mas_slot(mas, slots, mas->offset);
4747 if (ma_dead_node(node))
4756 mas->index = pivot + 1;
4760 if (mas->index > mas->max) {
4761 mas->index = mas->last;
4765 pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
4766 entry = mas_slot(mas, slots, mas->offset);
4767 if (ma_dead_node(node))
4781 static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
4785 mas_set(mas, index);
4786 mas_state_walk(mas);
4787 if (mas_is_start(mas))
4795 * mas_next_entry() - Internal function to get the next entry.
4796 * @mas: The maple state
4797 * @limit: The maximum range start.
4799 * Set the @mas->node to the next entry and the range_start to
4800 * the beginning value for the entry. Does not check beyond @limit.
4801 * Sets @mas->index and @mas->last to the limit if it is hit.
4802 * Restarts on dead nodes.
4804 * Return: the next entry or %NULL.
4806 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
4809 struct maple_enode *prev_node;
4810 struct maple_node *node;
4811 unsigned char offset;
4815 if (mas->index > limit) {
4816 mas->index = mas->last = limit;
4822 offset = mas->offset;
4823 prev_node = mas->node;
4825 mt = mte_node_type(mas->node);
4827 if (unlikely(mas->offset >= mt_slots[mt])) {
4828 mas->offset = mt_slots[mt] - 1;
4832 while (!mas_is_none(mas)) {
4833 entry = mas_next_nentry(mas, node, limit, mt);
4834 if (unlikely(ma_dead_node(node))) {
4835 mas_rewalk(mas, last);
4842 if (unlikely((mas->index > limit)))
4846 prev_node = mas->node;
4847 offset = mas->offset;
4848 if (unlikely(mas_next_node(mas, node, limit))) {
4849 mas_rewalk(mas, last);
4854 mt = mte_node_type(mas->node);
4857 mas->index = mas->last = limit;
4858 mas->offset = offset;
4859 mas->node = prev_node;
4864 * mas_prev_nentry() - Get the previous node entry.
4865 * @mas: The maple state.
4866 * @limit: The lower limit to check for a value.
4868 * Return: the entry, %NULL otherwise.
4870 static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit,
4871 unsigned long index)
4873 unsigned long pivot, min;
4874 unsigned char offset;
4875 struct maple_node *mn;
4877 unsigned long *pivots;
4886 mt = mte_node_type(mas->node);
4887 offset = mas->offset - 1;
4888 if (offset >= mt_slots[mt])
4889 offset = mt_slots[mt] - 1;
4891 slots = ma_slots(mn, mt);
4892 pivots = ma_pivots(mn, mt);
4893 if (unlikely(ma_dead_node(mn))) {
4894 mas_rewalk(mas, index);
4898 if (offset == mt_pivots[mt])
4901 pivot = pivots[offset];
4903 if (unlikely(ma_dead_node(mn))) {
4904 mas_rewalk(mas, index);
4908 while (offset && ((!mas_slot(mas, slots, offset) && pivot >= limit) ||
4910 pivot = pivots[--offset];
4912 min = mas_safe_min(mas, pivots, offset);
4913 entry = mas_slot(mas, slots, offset);
4914 if (unlikely(ma_dead_node(mn))) {
4915 mas_rewalk(mas, index);
4919 if (likely(entry)) {
4920 mas->offset = offset;
4927 static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min)
4931 if (mas->index < min) {
4932 mas->index = mas->last = min;
4933 mas->node = MAS_NONE;
4937 while (likely(!mas_is_none(mas))) {
4938 entry = mas_prev_nentry(mas, min, mas->index);
4939 if (unlikely(mas->last < min))
4945 if (unlikely(mas_prev_node(mas, min))) {
4946 mas_rewalk(mas, mas->index);
4955 mas->index = mas->last = min;
4960 * mas_rev_awalk() - Internal function. Reverse allocation walk. Find the
4961 * highest gap address of a given size in a given node and descend.
4962 * @mas: The maple state
4963 * @size: The needed size.
4965 * Return: True if found in a leaf, false otherwise.
4968 static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
4970 enum maple_type type = mte_node_type(mas->node);
4971 struct maple_node *node = mas_mn(mas);
4972 unsigned long *pivots, *gaps;
4974 unsigned long gap = 0;
4975 unsigned long max, min;
4976 unsigned char offset;
4978 if (unlikely(mas_is_err(mas)))
4981 if (ma_is_dense(type)) {
4983 mas->offset = (unsigned char)(mas->index - mas->min);
4987 pivots = ma_pivots(node, type);
4988 slots = ma_slots(node, type);
4989 gaps = ma_gaps(node, type);
4990 offset = mas->offset;
4991 min = mas_safe_min(mas, pivots, offset);
4992 /* Skip out of bounds. */
4993 while (mas->last < min)
4994 min = mas_safe_min(mas, pivots, --offset);
4996 max = mas_safe_pivot(mas, pivots, offset, type);
4997 while (mas->index <= max) {
5001 else if (!mas_slot(mas, slots, offset))
5002 gap = max - min + 1;
5005 if ((size <= gap) && (size <= mas->last - min + 1))
5009 /* Skip the next slot, it cannot be a gap. */
5014 max = pivots[offset];
5015 min = mas_safe_min(mas, pivots, offset);
5025 min = mas_safe_min(mas, pivots, offset);
5028 if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
5031 if (unlikely(ma_is_leaf(type))) {
5032 mas->offset = offset;
5034 mas->max = min + gap - 1;
5038 /* descend, only happens under lock. */
5039 mas->node = mas_slot(mas, slots, offset);
5042 mas->offset = mas_data_end(mas);
5046 if (!mte_is_root(mas->node))
5050 mas_set_err(mas, -EBUSY);
5054 static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
5056 enum maple_type type = mte_node_type(mas->node);
5057 unsigned long pivot, min, gap = 0;
5058 unsigned char offset;
5059 unsigned long *gaps;
5060 unsigned long *pivots = ma_pivots(mas_mn(mas), type);
5061 void __rcu **slots = ma_slots(mas_mn(mas), type);
5064 if (ma_is_dense(type)) {
5065 mas->offset = (unsigned char)(mas->index - mas->min);
5069 gaps = ma_gaps(mte_to_node(mas->node), type);
5070 offset = mas->offset;
5071 min = mas_safe_min(mas, pivots, offset);
5072 for (; offset < mt_slots[type]; offset++) {
5073 pivot = mas_safe_pivot(mas, pivots, offset, type);
5074 if (offset && !pivot)
5077 /* Not within lower bounds */
5078 if (mas->index > pivot)
5083 else if (!mas_slot(mas, slots, offset))
5084 gap = min(pivot, mas->last) - max(mas->index, min) + 1;
5089 if (ma_is_leaf(type)) {
5093 if (mas->index <= pivot) {
5094 mas->node = mas_slot(mas, slots, offset);
5103 if (mas->last <= pivot) {
5104 mas_set_err(mas, -EBUSY);
5109 if (mte_is_root(mas->node))
5112 mas->offset = offset;
5117 * mas_walk() - Search for @mas->index in the tree.
5118 * @mas: The maple state.
5120 * mas->index and mas->last will be set to the range if there is a value. If
5121 * mas->node is MAS_NONE, reset to MAS_START.
5123 * Return: the entry at the location or %NULL.
5125 void *mas_walk(struct ma_state *mas)
5130 entry = mas_state_walk(mas);
5131 if (mas_is_start(mas))
5134 if (mas_is_ptr(mas)) {
5139 mas->last = ULONG_MAX;
5144 if (mas_is_none(mas)) {
5146 mas->last = ULONG_MAX;
5151 EXPORT_SYMBOL_GPL(mas_walk);
5153 static inline bool mas_rewind_node(struct ma_state *mas)
5158 if (mte_is_root(mas->node)) {
5168 mas->offset = --slot;
5173 * mas_skip_node() - Internal function. Skip over a node.
5174 * @mas: The maple state.
5176 * Return: true if there is another node, false otherwise.
5178 static inline bool mas_skip_node(struct ma_state *mas)
5180 if (mas_is_err(mas))
5184 if (mte_is_root(mas->node)) {
5185 if (mas->offset >= mas_data_end(mas)) {
5186 mas_set_err(mas, -EBUSY);
5192 } while (mas->offset >= mas_data_end(mas));
5199 * mas_awalk() - Allocation walk. Search from low address to high, for a gap of
5201 * @mas: The maple state
5202 * @size: The size of the gap required
5204 * Search between @mas->index and @mas->last for a gap of @size.
5206 static inline void mas_awalk(struct ma_state *mas, unsigned long size)
5208 struct maple_enode *last = NULL;
5211 * There are 4 options:
5212 * go to child (descend)
5213 * go back to parent (ascend)
5214 * no gap found. (return, slot == MAPLE_NODE_SLOTS)
5215 * found the gap. (return, slot != MAPLE_NODE_SLOTS)
5217 while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
5218 if (last == mas->node)
5226 * mas_fill_gap() - Fill a located gap with @entry.
5227 * @mas: The maple state
5228 * @entry: The value to store
5229 * @slot: The offset into the node to store the @entry
5230 * @size: The size of the entry
5231 * @index: The start location
5233 static inline void mas_fill_gap(struct ma_state *mas, void *entry,
5234 unsigned char slot, unsigned long size, unsigned long *index)
5236 MA_WR_STATE(wr_mas, mas, entry);
5237 unsigned char pslot = mte_parent_slot(mas->node);
5238 struct maple_enode *mn = mas->node;
5239 unsigned long *pivots;
5240 enum maple_type ptype;
5242 * mas->index is the start address for the search
5243 * which may no longer be needed.
5244 * mas->last is the end address for the search
5247 *index = mas->index;
5248 mas->last = mas->index + size - 1;
5251 * It is possible that using mas->max and mas->min to correctly
5252 * calculate the index and last will cause an issue in the gap
5253 * calculation, so fix the ma_state here
5256 ptype = mte_node_type(mas->node);
5257 pivots = ma_pivots(mas_mn(mas), ptype);
5258 mas->max = mas_safe_pivot(mas, pivots, pslot, ptype);
5259 mas->min = mas_safe_min(mas, pivots, pslot);
5262 mas_wr_store_entry(&wr_mas);
5266 * mas_sparse_area() - Internal function. Return upper or lower limit when
5267 * searching for a gap in an empty tree.
5268 * @mas: The maple state
5269 * @min: the minimum range
5270 * @max: The maximum range
5271 * @size: The size of the gap
5272 * @fwd: Searching forward or back
5274 static inline void mas_sparse_area(struct ma_state *mas, unsigned long min,
5275 unsigned long max, unsigned long size, bool fwd)
5277 unsigned long start = 0;
5279 if (!unlikely(mas_is_none(mas)))
5288 mas->last = start + size - 1;
5296 * mas_empty_area() - Get the lowest address within the range that is
5297 * sufficient for the size requested.
5298 * @mas: The maple state
5299 * @min: The lowest value of the range
5300 * @max: The highest value of the range
5301 * @size: The size needed
5303 int mas_empty_area(struct ma_state *mas, unsigned long min,
5304 unsigned long max, unsigned long size)
5306 unsigned char offset;
5307 unsigned long *pivots;
5310 if (mas_is_start(mas))
5312 else if (mas->offset >= 2)
5314 else if (!mas_skip_node(mas))
5318 if (mas_is_none(mas) || mas_is_ptr(mas)) {
5319 mas_sparse_area(mas, min, max, size, true);
5323 /* The start of the window can only be within these values */
5326 mas_awalk(mas, size);
5328 if (unlikely(mas_is_err(mas)))
5329 return xa_err(mas->node);
5331 offset = mas->offset;
5332 if (unlikely(offset == MAPLE_NODE_SLOTS))
5335 mt = mte_node_type(mas->node);
5336 pivots = ma_pivots(mas_mn(mas), mt);
5338 mas->min = pivots[offset - 1] + 1;
5340 if (offset < mt_pivots[mt])
5341 mas->max = pivots[offset];
5343 if (mas->index < mas->min)
5344 mas->index = mas->min;
5346 mas->last = mas->index + size - 1;
5349 EXPORT_SYMBOL_GPL(mas_empty_area);
5352 * mas_empty_area_rev() - Get the highest address within the range that is
5353 * sufficient for the size requested.
5354 * @mas: The maple state
5355 * @min: The lowest value of the range
5356 * @max: The highest value of the range
5357 * @size: The size needed
5359 int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
5360 unsigned long max, unsigned long size)
5362 struct maple_enode *last = mas->node;
5364 if (mas_is_start(mas)) {
5366 mas->offset = mas_data_end(mas);
5367 } else if (mas->offset >= 2) {
5369 } else if (!mas_rewind_node(mas)) {
5374 if (mas_is_none(mas) || mas_is_ptr(mas)) {
5375 mas_sparse_area(mas, min, max, size, false);
5379 /* The start of the window can only be within these values. */
5383 while (!mas_rev_awalk(mas, size)) {
5384 if (last == mas->node) {
5385 if (!mas_rewind_node(mas))
5392 if (mas_is_err(mas))
5393 return xa_err(mas->node);
5395 if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
5399 * mas_rev_awalk() has set mas->min and mas->max to the gap values. If
5400 * the maximum is outside the window we are searching, then use the last
5401 * location in the search.
5402 * mas->max and mas->min is the range of the gap.
5403 * mas->index and mas->last are currently set to the search range.
5406 /* Trim the upper limit to the max. */
5407 if (mas->max <= mas->last)
5408 mas->last = mas->max;
5410 mas->index = mas->last - size + 1;
5413 EXPORT_SYMBOL_GPL(mas_empty_area_rev);
5415 static inline int mas_alloc(struct ma_state *mas, void *entry,
5416 unsigned long size, unsigned long *index)
5421 if (mas_is_none(mas) || mas_is_ptr(mas)) {
5422 mas_root_expand(mas, entry);
5423 if (mas_is_err(mas))
5424 return xa_err(mas->node);
5427 return mte_pivot(mas->node, 0);
5428 return mte_pivot(mas->node, 1);
5431 /* Must be walking a tree. */
5432 mas_awalk(mas, size);
5433 if (mas_is_err(mas))
5434 return xa_err(mas->node);
5436 if (mas->offset == MAPLE_NODE_SLOTS)
5440 * At this point, mas->node points to the right node and we have an
5441 * offset that has a sufficient gap.
5445 min = mte_pivot(mas->node, mas->offset - 1) + 1;
5447 if (mas->index < min)
5450 mas_fill_gap(mas, entry, mas->offset, size, index);
5457 static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
5458 unsigned long max, void *entry,
5459 unsigned long size, unsigned long *index)
5463 ret = mas_empty_area_rev(mas, min, max, size);
5467 if (mas_is_err(mas))
5468 return xa_err(mas->node);
5470 if (mas->offset == MAPLE_NODE_SLOTS)
5473 mas_fill_gap(mas, entry, mas->offset, size, index);
5481 * mte_dead_leaves() - Mark all leaves of a node as dead.
5482 * @mas: The maple state
5483 * @slots: Pointer to the slot array
5484 * @type: The maple node type
5486 * Must hold the write lock.
5488 * Return: The number of leaves marked as dead.
5491 unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
5494 struct maple_node *node;
5495 enum maple_type type;
5499 for (offset = 0; offset < mt_slot_count(enode); offset++) {
5500 entry = mt_slot(mt, slots, offset);
5501 type = mte_node_type(entry);
5502 node = mte_to_node(entry);
5503 /* Use both node and type to catch LE & BE metadata */
5507 mte_set_node_dead(entry);
5509 rcu_assign_pointer(slots[offset], node);
5516 * mte_dead_walk() - Walk down a dead tree to just before the leaves
5517 * @enode: The maple encoded node
5518 * @offset: The starting offset
5520 * Note: This can only be used from the RCU callback context.
5522 static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
5524 struct maple_node *node, *next;
5525 void __rcu **slots = NULL;
5527 next = mte_to_node(*enode);
5529 *enode = ma_enode_ptr(next);
5530 node = mte_to_node(*enode);
5531 slots = ma_slots(node, node->type);
5532 next = rcu_dereference_protected(slots[offset],
5533 lock_is_held(&rcu_callback_map));
5535 } while (!ma_is_leaf(next->type));
5541 * mt_free_walk() - Walk & free a tree in the RCU callback context
5542 * @head: The RCU head that's within the node.
5544 * Note: This can only be used from the RCU callback context.
5546 static void mt_free_walk(struct rcu_head *head)
5549 struct maple_node *node, *start;
5550 struct maple_enode *enode;
5551 unsigned char offset;
5552 enum maple_type type;
5554 node = container_of(head, struct maple_node, rcu);
5556 if (ma_is_leaf(node->type))
5560 enode = mt_mk_node(node, node->type);
5561 slots = mte_dead_walk(&enode, 0);
5562 node = mte_to_node(enode);
5564 mt_free_bulk(node->slot_len, slots);
5565 offset = node->parent_slot + 1;
5566 enode = node->piv_parent;
5567 if (mte_to_node(enode) == node)
5570 type = mte_node_type(enode);
5571 slots = ma_slots(mte_to_node(enode), type);
5572 if ((offset < mt_slots[type]) &&
5573 rcu_dereference_protected(slots[offset],
5574 lock_is_held(&rcu_callback_map)))
5575 slots = mte_dead_walk(&enode, offset);
5576 node = mte_to_node(enode);
5577 } while ((node != start) || (node->slot_len < offset));
5579 slots = ma_slots(node, node->type);
5580 mt_free_bulk(node->slot_len, slots);
5583 mt_free_rcu(&node->rcu);
5586 static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
5587 struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
5589 struct maple_node *node;
5590 struct maple_enode *next = *enode;
5591 void __rcu **slots = NULL;
5592 enum maple_type type;
5593 unsigned char next_offset = 0;
5597 node = mte_to_node(*enode);
5598 type = mte_node_type(*enode);
5599 slots = ma_slots(node, type);
5600 next = mt_slot_locked(mt, slots, next_offset);
5601 if ((mte_dead_node(next)))
5602 next = mt_slot_locked(mt, slots, ++next_offset);
5604 mte_set_node_dead(*enode);
5606 node->piv_parent = prev;
5607 node->parent_slot = offset;
5608 offset = next_offset;
5611 } while (!mte_is_leaf(next));
5616 static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
5620 struct maple_node *node = mte_to_node(enode);
5621 struct maple_enode *start;
5623 if (mte_is_leaf(enode)) {
5624 node->type = mte_node_type(enode);
5629 slots = mte_destroy_descend(&enode, mt, start, 0);
5630 node = mte_to_node(enode); // Updated in the above call.
5632 enum maple_type type;
5633 unsigned char offset;
5634 struct maple_enode *parent, *tmp;
5636 node->slot_len = mte_dead_leaves(enode, mt, slots);
5638 mt_free_bulk(node->slot_len, slots);
5639 offset = node->parent_slot + 1;
5640 enode = node->piv_parent;
5641 if (mte_to_node(enode) == node)
5644 type = mte_node_type(enode);
5645 slots = ma_slots(mte_to_node(enode), type);
5646 if (offset >= mt_slots[type])
5649 tmp = mt_slot_locked(mt, slots, offset);
5650 if (mte_node_type(tmp) && mte_to_node(tmp)) {
5653 slots = mte_destroy_descend(&enode, mt, parent, offset);
5656 node = mte_to_node(enode);
5657 } while (start != enode);
5659 node = mte_to_node(enode);
5660 node->slot_len = mte_dead_leaves(enode, mt, slots);
5662 mt_free_bulk(node->slot_len, slots);
5666 mt_free_rcu(&node->rcu);
5668 mt_clear_meta(mt, node, node->type);
5672 * mte_destroy_walk() - Free a tree or sub-tree.
5673 * @enode - the encoded maple node (maple_enode) to start
5674 * @mn - the tree to free - needed for node types.
5676 * Must hold the write lock.
5678 static inline void mte_destroy_walk(struct maple_enode *enode,
5679 struct maple_tree *mt)
5681 struct maple_node *node = mte_to_node(enode);
5683 if (mt_in_rcu(mt)) {
5684 mt_destroy_walk(enode, mt, false);
5685 call_rcu(&node->rcu, mt_free_walk);
5687 mt_destroy_walk(enode, mt, true);
5691 static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
5693 if (unlikely(mas_is_paused(wr_mas->mas)))
5694 mas_reset(wr_mas->mas);
5696 if (!mas_is_start(wr_mas->mas)) {
5697 if (mas_is_none(wr_mas->mas)) {
5698 mas_reset(wr_mas->mas);
5700 wr_mas->r_max = wr_mas->mas->max;
5701 wr_mas->type = mte_node_type(wr_mas->mas->node);
5702 if (mas_is_span_wr(wr_mas))
5703 mas_reset(wr_mas->mas);
5712 * mas_store() - Store an @entry.
5713 * @mas: The maple state.
5714 * @entry: The entry to store.
5716 * The @mas->index and @mas->last is used to set the range for the @entry.
5717 * Note: The @mas should have pre-allocated entries to ensure there is memory to
5718 * store the entry. Please see mas_expected_entries()/mas_destroy() for more details.
5720 * Return: the first entry between mas->index and mas->last or %NULL.
5722 void *mas_store(struct ma_state *mas, void *entry)
5724 MA_WR_STATE(wr_mas, mas, entry);
5726 trace_ma_write(__func__, mas, 0, entry);
5727 #ifdef CONFIG_DEBUG_MAPLE_TREE
5728 if (mas->index > mas->last)
5729 pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry);
5730 MT_BUG_ON(mas->tree, mas->index > mas->last);
5731 if (mas->index > mas->last) {
5732 mas_set_err(mas, -EINVAL);
5739 * Storing is the same operation as insert with the added caveat that it
5740 * can overwrite entries. Although this seems simple enough, one may
5741 * want to examine what happens if a single store operation was to
5742 * overwrite multiple entries within a self-balancing B-Tree.
5744 mas_wr_store_setup(&wr_mas);
5745 mas_wr_store_entry(&wr_mas);
5746 return wr_mas.content;
5748 EXPORT_SYMBOL_GPL(mas_store);
5751 * mas_store_gfp() - Store a value into the tree.
5752 * @mas: The maple state
5753 * @entry: The entry to store
5754 * @gfp: The GFP_FLAGS to use for allocations if necessary.
5756 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5759 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
5761 MA_WR_STATE(wr_mas, mas, entry);
5763 mas_wr_store_setup(&wr_mas);
5764 trace_ma_write(__func__, mas, 0, entry);
5766 mas_wr_store_entry(&wr_mas);
5767 if (unlikely(mas_nomem(mas, gfp)))
5770 if (unlikely(mas_is_err(mas)))
5771 return xa_err(mas->node);
5775 EXPORT_SYMBOL_GPL(mas_store_gfp);
5778 * mas_store_prealloc() - Store a value into the tree using memory
5779 * preallocated in the maple state.
5780 * @mas: The maple state
5781 * @entry: The entry to store.
5783 void mas_store_prealloc(struct ma_state *mas, void *entry)
5785 MA_WR_STATE(wr_mas, mas, entry);
5787 mas_wr_store_setup(&wr_mas);
5788 trace_ma_write(__func__, mas, 0, entry);
5789 mas_wr_store_entry(&wr_mas);
5790 BUG_ON(mas_is_err(mas));
5793 EXPORT_SYMBOL_GPL(mas_store_prealloc);
5796 * mas_preallocate() - Preallocate enough nodes for a store operation
5797 * @mas: The maple state
5798 * @entry: The entry that will be stored
5799 * @gfp: The GFP_FLAGS to use for allocations.
5801 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5803 int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
5807 mas_node_count_gfp(mas, 1 + mas_mt_height(mas) * 3, gfp);
5808 mas->mas_flags |= MA_STATE_PREALLOC;
5809 if (likely(!mas_is_err(mas)))
5812 mas_set_alloc_req(mas, 0);
5813 ret = xa_err(mas->node);
5821 * mas_destroy() - destroy a maple state.
5822 * @mas: The maple state
5824 * Upon completion, check the left-most node and rebalance against the node to
5825 * the right if necessary. Frees any allocated nodes associated with this maple
5828 void mas_destroy(struct ma_state *mas)
5830 struct maple_alloc *node;
5831 unsigned long total;
5834 * When using mas_for_each() to insert an expected number of elements,
5835 * it is possible that the number inserted is less than the expected
5836 * number. To fix an invalid final node, a check is performed here to
5837 * rebalance the previous node with the final node.
5839 if (mas->mas_flags & MA_STATE_REBALANCE) {
5842 if (mas_is_start(mas))
5845 mtree_range_walk(mas);
5846 end = mas_data_end(mas) + 1;
5847 if (end < mt_min_slot_count(mas->node) - 1)
5848 mas_destroy_rebalance(mas, end);
5850 mas->mas_flags &= ~MA_STATE_REBALANCE;
5852 mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
5854 total = mas_allocated(mas);
5857 mas->alloc = node->slot[0];
5858 if (node->node_count > 1) {
5859 size_t count = node->node_count - 1;
5861 mt_free_bulk(count, (void __rcu **)&node->slot[1]);
5864 kmem_cache_free(maple_node_cache, node);
5870 EXPORT_SYMBOL_GPL(mas_destroy);
5873 * mas_expected_entries() - Set the expected number of entries that will be inserted.
5874 * @mas: The maple state
5875 * @nr_entries: The number of expected entries.
5877 * This will attempt to pre-allocate enough nodes to store the expected number
5878 * of entries. The allocations will occur using the bulk allocator interface
5879 * for speed. Please call mas_destroy() on the @mas after inserting the entries
5880 * to ensure any unused nodes are freed.
5882 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5884 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
5886 int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
5887 struct maple_enode *enode = mas->node;
5892 * Sometimes it is necessary to duplicate a tree to a new tree, such as
5893 * forking a process and duplicating the VMAs from one tree to a new
5894 * tree. When such a situation arises, it is known that the new tree is
5895 * not going to be used until the entire tree is populated. For
5896 * performance reasons, it is best to use a bulk load with RCU disabled.
5897 * This allows for optimistic splitting that favours the left and reuse
5898 * of nodes during the operation.
5901 /* Optimize splitting for bulk insert in-order */
5902 mas->mas_flags |= MA_STATE_BULK;
5905 * Avoid overflow, assume a gap between each entry and a trailing null.
5906 * If this is wrong, it just means allocation can happen during
5907 * insertion of entries.
5909 nr_nodes = max(nr_entries, nr_entries * 2 + 1);
5910 if (!mt_is_alloc(mas->tree))
5911 nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
5913 /* Leaves; reduce slots to keep space for expansion */
5914 nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
5915 /* Internal nodes */
5916 nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
5917 /* Add working room for split (2 nodes) + new parents */
5918 mas_node_count(mas, nr_nodes + 3);
5920 /* Detect if allocations run out */
5921 mas->mas_flags |= MA_STATE_PREALLOC;
5923 if (!mas_is_err(mas))
5926 ret = xa_err(mas->node);
5932 EXPORT_SYMBOL_GPL(mas_expected_entries);
5935 * mas_next() - Get the next entry.
5936 * @mas: The maple state
5937 * @max: The maximum index to check.
5939 * Returns the next entry after @mas->index.
5940 * Must hold rcu_read_lock or the write lock.
5941 * Can return the zero entry.
5943 * Return: The next entry or %NULL
5945 void *mas_next(struct ma_state *mas, unsigned long max)
5947 if (mas_is_none(mas) || mas_is_paused(mas))
5948 mas->node = MAS_START;
5950 if (mas_is_start(mas))
5951 mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5953 if (mas_is_ptr(mas)) {
5956 mas->last = ULONG_MAX;
5961 if (mas->last == ULONG_MAX)
5964 /* Retries on dead nodes handled by mas_next_entry */
5965 return mas_next_entry(mas, max);
5967 EXPORT_SYMBOL_GPL(mas_next);
5970 * mt_next() - get the next value in the maple tree
5971 * @mt: The maple tree
5972 * @index: The start index
5973 * @max: The maximum index to check
5975 * Return: The entry at @index or higher, or %NULL if nothing is found.
5977 void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5980 MA_STATE(mas, mt, index, index);
5983 entry = mas_next(&mas, max);
5987 EXPORT_SYMBOL_GPL(mt_next);
5990 * mas_prev() - Get the previous entry
5991 * @mas: The maple state
5992 * @min: The minimum value to check.
5994 * Must hold rcu_read_lock or the write lock.
5995 * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not
5998 * Return: the previous value or %NULL.
6000 void *mas_prev(struct ma_state *mas, unsigned long min)
6003 /* Nothing comes before 0 */
6005 mas->node = MAS_NONE;
6009 if (unlikely(mas_is_ptr(mas)))
6012 if (mas_is_none(mas) || mas_is_paused(mas))
6013 mas->node = MAS_START;
6015 if (mas_is_start(mas)) {
6021 if (mas_is_ptr(mas)) {
6027 mas->index = mas->last = 0;
6028 return mas_root_locked(mas);
6030 return mas_prev_entry(mas, min);
6032 EXPORT_SYMBOL_GPL(mas_prev);
6035 * mt_prev() - get the previous value in the maple tree
6036 * @mt: The maple tree
6037 * @index: The start index
6038 * @min: The minimum index to check
6040 * Return: The entry at @index or lower, or %NULL if nothing is found.
6042 void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
6045 MA_STATE(mas, mt, index, index);
6048 entry = mas_prev(&mas, min);
6052 EXPORT_SYMBOL_GPL(mt_prev);
6055 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
6056 * @mas: The maple state to pause
6058 * Some users need to pause a walk and drop the lock they're holding in
6059 * order to yield to a higher priority thread or carry out an operation
6060 * on an entry. Those users should call this function before they drop
6061 * the lock. It resets the @mas to be suitable for the next iteration
6062 * of the loop after the user has reacquired the lock. If most entries
6063 * found during a walk require you to call mas_pause(), the mt_for_each()
6064 * iterator may be more appropriate.
6067 void mas_pause(struct ma_state *mas)
6069 mas->node = MAS_PAUSE;
6071 EXPORT_SYMBOL_GPL(mas_pause);
6074 * mas_find() - On the first call, find the entry at or after mas->index up to
6075 * %max. Otherwise, find the entry after mas->index.
6076 * @mas: The maple state
6077 * @max: The maximum value to check.
6079 * Must hold rcu_read_lock or the write lock.
6080 * If an entry exists, last and index are updated accordingly.
6081 * May set @mas->node to MAS_NONE.
6083 * Return: The entry or %NULL.
6085 void *mas_find(struct ma_state *mas, unsigned long max)
6087 if (unlikely(mas_is_paused(mas))) {
6088 if (unlikely(mas->last == ULONG_MAX)) {
6089 mas->node = MAS_NONE;
6092 mas->node = MAS_START;
6093 mas->index = ++mas->last;
6096 if (unlikely(mas_is_none(mas)))
6097 mas->node = MAS_START;
6099 if (unlikely(mas_is_start(mas))) {
6100 /* First run or continue */
6103 if (mas->index > max)
6106 entry = mas_walk(mas);
6111 if (unlikely(!mas_searchable(mas)))
6114 /* Retries on dead nodes handled by mas_next_entry */
6115 return mas_next_entry(mas, max);
6117 EXPORT_SYMBOL_GPL(mas_find);
6120 * mas_find_rev: On the first call, find the first non-null entry at or below
6121 * mas->index down to %min. Otherwise find the first non-null entry below
6122 * mas->index down to %min.
6123 * @mas: The maple state
6124 * @min: The minimum value to check.
6126 * Must hold rcu_read_lock or the write lock.
6127 * If an entry exists, last and index are updated accordingly.
6128 * May set @mas->node to MAS_NONE.
6130 * Return: The entry or %NULL.
6132 void *mas_find_rev(struct ma_state *mas, unsigned long min)
6134 if (unlikely(mas_is_paused(mas))) {
6135 if (unlikely(mas->last == ULONG_MAX)) {
6136 mas->node = MAS_NONE;
6139 mas->node = MAS_START;
6140 mas->last = --mas->index;
6143 if (unlikely(mas_is_start(mas))) {
6144 /* First run or continue */
6147 if (mas->index < min)
6150 entry = mas_walk(mas);
6155 if (unlikely(!mas_searchable(mas)))
6158 if (mas->index < min)
6161 /* Retries on dead nodes handled by mas_next_entry */
6162 return mas_prev_entry(mas, min);
6164 EXPORT_SYMBOL_GPL(mas_find_rev);
6167 * mas_erase() - Find the range in which index resides and erase the entire
6169 * @mas: The maple state
6171 * Must hold the write lock.
6172 * Searches for @mas->index, sets @mas->index and @mas->last to the range and
6173 * erases that range.
6175 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
6177 void *mas_erase(struct ma_state *mas)
6180 MA_WR_STATE(wr_mas, mas, NULL);
6182 if (mas_is_none(mas) || mas_is_paused(mas))
6183 mas->node = MAS_START;
6185 /* Retry unnecessary when holding the write lock. */
6186 entry = mas_state_walk(mas);
6191 /* Must reset to ensure spanning writes of last slot are detected */
6193 mas_wr_store_setup(&wr_mas);
6194 mas_wr_store_entry(&wr_mas);
6195 if (mas_nomem(mas, GFP_KERNEL))
6200 EXPORT_SYMBOL_GPL(mas_erase);
6203 * mas_nomem() - Check if there was an error allocating and do the allocation
6204 * if necessary If there are allocations, then free them.
6205 * @mas: The maple state
6206 * @gfp: The GFP_FLAGS to use for allocations
6207 * Return: true on allocation, false otherwise.
6209 bool mas_nomem(struct ma_state *mas, gfp_t gfp)
6210 __must_hold(mas->tree->lock)
6212 if (likely(mas->node != MA_ERROR(-ENOMEM))) {
6217 if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
6218 mtree_unlock(mas->tree);
6219 mas_alloc_nodes(mas, gfp);
6220 mtree_lock(mas->tree);
6222 mas_alloc_nodes(mas, gfp);
6225 if (!mas_allocated(mas))
6228 mas->node = MAS_START;
6232 void __init maple_tree_init(void)
6234 maple_node_cache = kmem_cache_create("maple_node",
6235 sizeof(struct maple_node), sizeof(struct maple_node),
6240 * mtree_load() - Load a value stored in a maple tree
6241 * @mt: The maple tree
6242 * @index: The index to load
6244 * Return: the entry or %NULL
6246 void *mtree_load(struct maple_tree *mt, unsigned long index)
6248 MA_STATE(mas, mt, index, index);
6251 trace_ma_read(__func__, &mas);
6254 entry = mas_start(&mas);
6255 if (unlikely(mas_is_none(&mas)))
6258 if (unlikely(mas_is_ptr(&mas))) {
6265 entry = mtree_lookup_walk(&mas);
6266 if (!entry && unlikely(mas_is_start(&mas)))
6270 if (xa_is_zero(entry))
6275 EXPORT_SYMBOL(mtree_load);
6278 * mtree_store_range() - Store an entry at a given range.
6279 * @mt: The maple tree
6280 * @index: The start of the range
6281 * @last: The end of the range
6282 * @entry: The entry to store
6283 * @gfp: The GFP_FLAGS to use for allocations
6285 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6288 int mtree_store_range(struct maple_tree *mt, unsigned long index,
6289 unsigned long last, void *entry, gfp_t gfp)
6291 MA_STATE(mas, mt, index, last);
6292 MA_WR_STATE(wr_mas, &mas, entry);
6294 trace_ma_write(__func__, &mas, 0, entry);
6295 if (WARN_ON_ONCE(xa_is_advanced(entry)))
6303 mas_wr_store_entry(&wr_mas);
6304 if (mas_nomem(&mas, gfp))
6308 if (mas_is_err(&mas))
6309 return xa_err(mas.node);
6313 EXPORT_SYMBOL(mtree_store_range);
6316 * mtree_store() - Store an entry at a given index.
6317 * @mt: The maple tree
6318 * @index: The index to store the value
6319 * @entry: The entry to store
6320 * @gfp: The GFP_FLAGS to use for allocations
6322 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6325 int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
6328 return mtree_store_range(mt, index, index, entry, gfp);
6330 EXPORT_SYMBOL(mtree_store);
6333 * mtree_insert_range() - Insert an entry at a give range if there is no value.
6334 * @mt: The maple tree
6335 * @first: The start of the range
6336 * @last: The end of the range
6337 * @entry: The entry to store
6338 * @gfp: The GFP_FLAGS to use for allocations.
6340 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6341 * request, -ENOMEM if memory could not be allocated.
6343 int mtree_insert_range(struct maple_tree *mt, unsigned long first,
6344 unsigned long last, void *entry, gfp_t gfp)
6346 MA_STATE(ms, mt, first, last);
6348 if (WARN_ON_ONCE(xa_is_advanced(entry)))
6356 mas_insert(&ms, entry);
6357 if (mas_nomem(&ms, gfp))
6361 if (mas_is_err(&ms))
6362 return xa_err(ms.node);
6366 EXPORT_SYMBOL(mtree_insert_range);
6369 * mtree_insert() - Insert an entry at a give index if there is no value.
6370 * @mt: The maple tree
6371 * @index : The index to store the value
6372 * @entry: The entry to store
6373 * @gfp: The FGP_FLAGS to use for allocations.
6375 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6376 * request, -ENOMEM if memory could not be allocated.
6378 int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
6381 return mtree_insert_range(mt, index, index, entry, gfp);
6383 EXPORT_SYMBOL(mtree_insert);
6385 int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
6386 void *entry, unsigned long size, unsigned long min,
6387 unsigned long max, gfp_t gfp)
6391 MA_STATE(mas, mt, min, max - size);
6392 if (!mt_is_alloc(mt))
6395 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6411 mas.last = max - size;
6412 ret = mas_alloc(&mas, entry, size, startp);
6413 if (mas_nomem(&mas, gfp))
6419 EXPORT_SYMBOL(mtree_alloc_range);
6421 int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
6422 void *entry, unsigned long size, unsigned long min,
6423 unsigned long max, gfp_t gfp)
6427 MA_STATE(mas, mt, min, max - size);
6428 if (!mt_is_alloc(mt))
6431 if (WARN_ON_ONCE(mt_is_reserved(entry)))
6445 ret = mas_rev_alloc(&mas, min, max, entry, size, startp);
6446 if (mas_nomem(&mas, gfp))
6452 EXPORT_SYMBOL(mtree_alloc_rrange);
6455 * mtree_erase() - Find an index and erase the entire range.
6456 * @mt: The maple tree
6457 * @index: The index to erase
6459 * Erasing is the same as a walk to an entry then a store of a NULL to that
6460 * ENTIRE range. In fact, it is implemented as such using the advanced API.
6462 * Return: The entry stored at the @index or %NULL
6464 void *mtree_erase(struct maple_tree *mt, unsigned long index)
6468 MA_STATE(mas, mt, index, index);
6469 trace_ma_op(__func__, &mas);
6472 entry = mas_erase(&mas);
6477 EXPORT_SYMBOL(mtree_erase);
6480 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6481 * @mt: The maple tree
6483 * Note: Does not handle locking.
6485 void __mt_destroy(struct maple_tree *mt)
6487 void *root = mt_root_locked(mt);
6489 rcu_assign_pointer(mt->ma_root, NULL);
6490 if (xa_is_node(root))
6491 mte_destroy_walk(root, mt);
6495 EXPORT_SYMBOL_GPL(__mt_destroy);
6498 * mtree_destroy() - Destroy a maple tree
6499 * @mt: The maple tree
6501 * Frees all resources used by the tree. Handles locking.
6503 void mtree_destroy(struct maple_tree *mt)
6509 EXPORT_SYMBOL(mtree_destroy);
6512 * mt_find() - Search from the start up until an entry is found.
6513 * @mt: The maple tree
6514 * @index: Pointer which contains the start location of the search
6515 * @max: The maximum value to check
6517 * Handles locking. @index will be incremented to one beyond the range.
6519 * Return: The entry at or after the @index or %NULL
6521 void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6523 MA_STATE(mas, mt, *index, *index);
6525 #ifdef CONFIG_DEBUG_MAPLE_TREE
6526 unsigned long copy = *index;
6529 trace_ma_read(__func__, &mas);
6536 entry = mas_state_walk(&mas);
6537 if (mas_is_start(&mas))
6540 if (unlikely(xa_is_zero(entry)))
6546 while (mas_searchable(&mas) && (mas.index < max)) {
6547 entry = mas_next_entry(&mas, max);
6548 if (likely(entry && !xa_is_zero(entry)))
6552 if (unlikely(xa_is_zero(entry)))
6556 if (likely(entry)) {
6557 *index = mas.last + 1;
6558 #ifdef CONFIG_DEBUG_MAPLE_TREE
6559 if ((*index) && (*index) <= copy)
6560 pr_err("index not increased! %lx <= %lx\n",
6562 MT_BUG_ON(mt, (*index) && ((*index) <= copy));
6568 EXPORT_SYMBOL(mt_find);
6571 * mt_find_after() - Search from the start up until an entry is found.
6572 * @mt: The maple tree
6573 * @index: Pointer which contains the start location of the search
6574 * @max: The maximum value to check
6576 * Handles locking, detects wrapping on index == 0
6578 * Return: The entry at or after the @index or %NULL
6580 void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6586 return mt_find(mt, index, max);
6588 EXPORT_SYMBOL(mt_find_after);
6590 #ifdef CONFIG_DEBUG_MAPLE_TREE
6591 atomic_t maple_tree_tests_run;
6592 EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6593 atomic_t maple_tree_tests_passed;
6594 EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6597 extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
6598 void mt_set_non_kernel(unsigned int val)
6600 kmem_cache_set_non_kernel(maple_node_cache, val);
6603 extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
6604 unsigned long mt_get_alloc_size(void)
6606 return kmem_cache_get_alloc(maple_node_cache);
6609 extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
6610 void mt_zero_nr_tallocated(void)
6612 kmem_cache_zero_nr_tallocated(maple_node_cache);
6615 extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
6616 unsigned int mt_nr_tallocated(void)
6618 return kmem_cache_nr_tallocated(maple_node_cache);
6621 extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
6622 unsigned int mt_nr_allocated(void)
6624 return kmem_cache_nr_allocated(maple_node_cache);
6628 * mas_dead_node() - Check if the maple state is pointing to a dead node.
6629 * @mas: The maple state
6630 * @index: The index to restore in @mas.
6632 * Used in test code.
6633 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise.
6635 static inline int mas_dead_node(struct ma_state *mas, unsigned long index)
6637 if (unlikely(!mas_searchable(mas) || mas_is_start(mas)))
6640 if (likely(!mte_dead_node(mas->node)))
6643 mas_rewalk(mas, index);
6647 void mt_cache_shrink(void)
6652 * mt_cache_shrink() - For testing, don't use this.
6654 * Certain testcases can trigger an OOM when combined with other memory
6655 * debugging configuration options. This function is used to reduce the
6656 * possibility of an out of memory even due to kmem_cache objects remaining
6657 * around for longer than usual.
6659 void mt_cache_shrink(void)
6661 kmem_cache_shrink(maple_node_cache);
6664 EXPORT_SYMBOL_GPL(mt_cache_shrink);
6666 #endif /* not defined __KERNEL__ */
6668 * mas_get_slot() - Get the entry in the maple state node stored at @offset.
6669 * @mas: The maple state
6670 * @offset: The offset into the slot array to fetch.
6672 * Return: The entry stored at @offset.
6674 static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
6675 unsigned char offset)
6677 return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
6683 * mas_first_entry() - Go the first leaf and find the first entry.
6684 * @mas: the maple state.
6685 * @limit: the maximum index to check.
6686 * @*r_start: Pointer to set to the range start.
6688 * Sets mas->offset to the offset of the entry, r_start to the range minimum.
6690 * Return: The first entry or MAS_NONE.
6692 static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
6693 unsigned long limit, enum maple_type mt)
6697 unsigned long *pivots;
6701 mas->index = mas->min;
6702 if (mas->index > limit)
6707 while (likely(!ma_is_leaf(mt))) {
6708 MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
6709 slots = ma_slots(mn, mt);
6710 entry = mas_slot(mas, slots, 0);
6711 pivots = ma_pivots(mn, mt);
6712 if (unlikely(ma_dead_node(mn)))
6717 mt = mte_node_type(mas->node);
6719 MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
6722 slots = ma_slots(mn, mt);
6723 entry = mas_slot(mas, slots, 0);
6724 if (unlikely(ma_dead_node(mn)))
6727 /* Slot 0 or 1 must be set */
6728 if (mas->index > limit)
6735 entry = mas_slot(mas, slots, 1);
6736 pivots = ma_pivots(mn, mt);
6737 if (unlikely(ma_dead_node(mn)))
6740 mas->index = pivots[0] + 1;
6741 if (mas->index > limit)
6748 if (likely(!ma_dead_node(mn)))
6749 mas->node = MAS_NONE;
6753 /* Depth first search, post-order */
6754 static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
6757 struct maple_enode *p = MAS_NONE, *mn = mas->node;
6758 unsigned long p_min, p_max;
6760 mas_next_node(mas, mas_mn(mas), max);
6761 if (!mas_is_none(mas))
6764 if (mte_is_root(mn))
6769 while (mas->node != MAS_NONE) {
6773 mas_prev_node(mas, 0);
6784 /* Tree validations */
6785 static void mt_dump_node(const struct maple_tree *mt, void *entry,
6786 unsigned long min, unsigned long max, unsigned int depth);
6787 static void mt_dump_range(unsigned long min, unsigned long max,
6790 static const char spaces[] = " ";
6793 pr_info("%.*s%lu: ", depth * 2, spaces, min);
6795 pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
6798 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
6801 mt_dump_range(min, max, depth);
6803 if (xa_is_value(entry))
6804 pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
6805 xa_to_value(entry), entry);
6806 else if (xa_is_zero(entry))
6807 pr_cont("zero (%ld)\n", xa_to_internal(entry));
6808 else if (mt_is_reserved(entry))
6809 pr_cont("UNKNOWN ENTRY (%p)\n", entry);
6811 pr_cont("%p\n", entry);
6814 static void mt_dump_range64(const struct maple_tree *mt, void *entry,
6815 unsigned long min, unsigned long max, unsigned int depth)
6817 struct maple_range_64 *node = &mte_to_node(entry)->mr64;
6818 bool leaf = mte_is_leaf(entry);
6819 unsigned long first = min;
6822 pr_cont(" contents: ");
6823 for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++)
6824 pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
6825 pr_cont("%p\n", node->slot[i]);
6826 for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
6827 unsigned long last = max;
6829 if (i < (MAPLE_RANGE64_SLOTS - 1))
6830 last = node->pivot[i];
6831 else if (!node->slot[i] && max != mt_max[mte_node_type(entry)])
6833 if (last == 0 && i > 0)
6836 mt_dump_entry(mt_slot(mt, node->slot, i),
6837 first, last, depth + 1);
6838 else if (node->slot[i])
6839 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6840 first, last, depth + 1);
6845 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
6846 node, last, max, i);
6853 static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
6854 unsigned long min, unsigned long max, unsigned int depth)
6856 struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
6857 bool leaf = mte_is_leaf(entry);
6858 unsigned long first = min;
6861 pr_cont(" contents: ");
6862 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++)
6863 pr_cont("%lu ", node->gap[i]);
6864 pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
6865 for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++)
6866 pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
6867 pr_cont("%p\n", node->slot[i]);
6868 for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6869 unsigned long last = max;
6871 if (i < (MAPLE_ARANGE64_SLOTS - 1))
6872 last = node->pivot[i];
6873 else if (!node->slot[i])
6875 if (last == 0 && i > 0)
6878 mt_dump_entry(mt_slot(mt, node->slot, i),
6879 first, last, depth + 1);
6880 else if (node->slot[i])
6881 mt_dump_node(mt, mt_slot(mt, node->slot, i),
6882 first, last, depth + 1);
6887 pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
6888 node, last, max, i);
6895 static void mt_dump_node(const struct maple_tree *mt, void *entry,
6896 unsigned long min, unsigned long max, unsigned int depth)
6898 struct maple_node *node = mte_to_node(entry);
6899 unsigned int type = mte_node_type(entry);
6902 mt_dump_range(min, max, depth);
6904 pr_cont("node %p depth %d type %d parent %p", node, depth, type,
6905 node ? node->parent : NULL);
6909 for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
6911 pr_cont("OUT OF RANGE: ");
6912 mt_dump_entry(mt_slot(mt, node->slot, i),
6913 min + i, min + i, depth);
6917 case maple_range_64:
6918 mt_dump_range64(mt, entry, min, max, depth);
6920 case maple_arange_64:
6921 mt_dump_arange64(mt, entry, min, max, depth);
6925 pr_cont(" UNKNOWN TYPE\n");
6929 void mt_dump(const struct maple_tree *mt)
6931 void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
6933 pr_info("maple_tree(%p) flags %X, height %u root %p\n",
6934 mt, mt->ma_flags, mt_height(mt), entry);
6935 if (!xa_is_node(entry))
6936 mt_dump_entry(entry, 0, 0, 0);
6938 mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0);
6940 EXPORT_SYMBOL_GPL(mt_dump);
6943 * Calculate the maximum gap in a node and check if that's what is reported in
6944 * the parent (unless root).
6946 static void mas_validate_gaps(struct ma_state *mas)
6948 struct maple_enode *mte = mas->node;
6949 struct maple_node *p_mn;
6950 unsigned long gap = 0, max_gap = 0;
6951 unsigned long p_end, p_start = mas->min;
6952 unsigned char p_slot;
6953 unsigned long *gaps = NULL;
6954 unsigned long *pivots = ma_pivots(mte_to_node(mte), mte_node_type(mte));
6957 if (ma_is_dense(mte_node_type(mte))) {
6958 for (i = 0; i < mt_slot_count(mte); i++) {
6959 if (mas_get_slot(mas, i)) {
6970 gaps = ma_gaps(mte_to_node(mte), mte_node_type(mte));
6971 for (i = 0; i < mt_slot_count(mte); i++) {
6972 p_end = mas_logical_pivot(mas, pivots, i, mte_node_type(mte));
6975 if (mas_get_slot(mas, i)) {
6980 gap += p_end - p_start + 1;
6982 void *entry = mas_get_slot(mas, i);
6986 if (gap != p_end - p_start + 1) {
6987 pr_err("%p[%u] -> %p %lu != %lu - %lu + 1\n",
6989 mas_get_slot(mas, i), gap,
6993 MT_BUG_ON(mas->tree,
6994 gap != p_end - p_start + 1);
6997 if (gap > p_end - p_start + 1) {
6998 pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
6999 mas_mn(mas), i, gap, p_end, p_start,
7000 p_end - p_start + 1);
7001 MT_BUG_ON(mas->tree,
7002 gap > p_end - p_start + 1);
7010 p_start = p_end + 1;
7011 if (p_end >= mas->max)
7016 if (mte_is_root(mte))
7019 p_slot = mte_parent_slot(mas->node);
7020 p_mn = mte_parent(mte);
7021 MT_BUG_ON(mas->tree, max_gap > mas->max);
7022 if (ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap) {
7023 pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
7027 MT_BUG_ON(mas->tree,
7028 ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap);
7031 static void mas_validate_parent_slot(struct ma_state *mas)
7033 struct maple_node *parent;
7034 struct maple_enode *node;
7035 enum maple_type p_type = mas_parent_enum(mas, mas->node);
7036 unsigned char p_slot = mte_parent_slot(mas->node);
7040 if (mte_is_root(mas->node))
7043 parent = mte_parent(mas->node);
7044 slots = ma_slots(parent, p_type);
7045 MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
7047 /* Check prev/next parent slot for duplicate node entry */
7049 for (i = 0; i < mt_slots[p_type]; i++) {
7050 node = mas_slot(mas, slots, i);
7052 if (node != mas->node)
7053 pr_err("parent %p[%u] does not have %p\n",
7054 parent, i, mas_mn(mas));
7055 MT_BUG_ON(mas->tree, node != mas->node);
7056 } else if (node == mas->node) {
7057 pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
7058 mas_mn(mas), parent, i, p_slot);
7059 MT_BUG_ON(mas->tree, node == mas->node);
7064 static void mas_validate_child_slot(struct ma_state *mas)
7066 enum maple_type type = mte_node_type(mas->node);
7067 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7068 unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
7069 struct maple_enode *child;
7072 if (mte_is_leaf(mas->node))
7075 for (i = 0; i < mt_slots[type]; i++) {
7076 child = mas_slot(mas, slots, i);
7077 if (!pivots[i] || pivots[i] == mas->max)
7083 if (mte_parent_slot(child) != i) {
7084 pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
7085 mas_mn(mas), i, mte_to_node(child),
7086 mte_parent_slot(child));
7087 MT_BUG_ON(mas->tree, 1);
7090 if (mte_parent(child) != mte_to_node(mas->node)) {
7091 pr_err("child %p has parent %p not %p\n",
7092 mte_to_node(child), mte_parent(child),
7093 mte_to_node(mas->node));
7094 MT_BUG_ON(mas->tree, 1);
7100 * Validate all pivots are within mas->min and mas->max.
7102 static void mas_validate_limits(struct ma_state *mas)
7105 unsigned long prev_piv = 0;
7106 enum maple_type type = mte_node_type(mas->node);
7107 void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7108 unsigned long *pivots = ma_pivots(mas_mn(mas), type);
7110 /* all limits are fine here. */
7111 if (mte_is_root(mas->node))
7114 for (i = 0; i < mt_slots[type]; i++) {
7117 piv = mas_safe_pivot(mas, pivots, i, type);
7119 if (!piv && (i != 0))
7122 if (!mte_is_leaf(mas->node)) {
7123 void *entry = mas_slot(mas, slots, i);
7126 pr_err("%p[%u] cannot be null\n",
7129 MT_BUG_ON(mas->tree, !entry);
7132 if (prev_piv > piv) {
7133 pr_err("%p[%u] piv %lu < prev_piv %lu\n",
7134 mas_mn(mas), i, piv, prev_piv);
7135 MT_BUG_ON(mas->tree, piv < prev_piv);
7138 if (piv < mas->min) {
7139 pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
7141 MT_BUG_ON(mas->tree, piv < mas->min);
7143 if (piv > mas->max) {
7144 pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
7146 MT_BUG_ON(mas->tree, piv > mas->max);
7149 if (piv == mas->max)
7152 for (i += 1; i < mt_slots[type]; i++) {
7153 void *entry = mas_slot(mas, slots, i);
7155 if (entry && (i != mt_slots[type] - 1)) {
7156 pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
7158 MT_BUG_ON(mas->tree, entry != NULL);
7161 if (i < mt_pivots[type]) {
7162 unsigned long piv = pivots[i];
7167 pr_err("%p[%u] should not have piv %lu\n",
7168 mas_mn(mas), i, piv);
7169 MT_BUG_ON(mas->tree, i < mt_pivots[type] - 1);
7174 static void mt_validate_nulls(struct maple_tree *mt)
7176 void *entry, *last = (void *)1;
7177 unsigned char offset = 0;
7179 MA_STATE(mas, mt, 0, 0);
7182 if (mas_is_none(&mas) || (mas.node == MAS_ROOT))
7185 while (!mte_is_leaf(mas.node))
7188 slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
7190 entry = mas_slot(&mas, slots, offset);
7191 if (!last && !entry) {
7192 pr_err("Sequential nulls end at %p[%u]\n",
7193 mas_mn(&mas), offset);
7195 MT_BUG_ON(mt, !last && !entry);
7197 if (offset == mas_data_end(&mas)) {
7198 mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
7199 if (mas_is_none(&mas))
7202 slots = ma_slots(mte_to_node(mas.node),
7203 mte_node_type(mas.node));
7208 } while (!mas_is_none(&mas));
7212 * validate a maple tree by checking:
7213 * 1. The limits (pivots are within mas->min to mas->max)
7214 * 2. The gap is correctly set in the parents
7216 void mt_validate(struct maple_tree *mt)
7220 MA_STATE(mas, mt, 0, 0);
7223 if (!mas_searchable(&mas))
7226 mas_first_entry(&mas, mas_mn(&mas), ULONG_MAX, mte_node_type(mas.node));
7227 while (!mas_is_none(&mas)) {
7228 MT_BUG_ON(mas.tree, mte_dead_node(mas.node));
7229 if (!mte_is_root(mas.node)) {
7230 end = mas_data_end(&mas);
7231 if ((end < mt_min_slot_count(mas.node)) &&
7232 (mas.max != ULONG_MAX)) {
7233 pr_err("Invalid size %u of %p\n", end,
7235 MT_BUG_ON(mas.tree, 1);
7239 mas_validate_parent_slot(&mas);
7240 mas_validate_child_slot(&mas);
7241 mas_validate_limits(&mas);
7242 if (mt_is_alloc(mt))
7243 mas_validate_gaps(&mas);
7244 mas_dfs_postorder(&mas, ULONG_MAX);
7246 mt_validate_nulls(mt);
7251 EXPORT_SYMBOL_GPL(mt_validate);
7253 #endif /* CONFIG_DEBUG_MAPLE_TREE */