The current memslots implementation only allows quick binary search by gfn,
quick lookup by hva is not possible - the implementation has to do a linear
scan of the whole memslots array, even though the operation being performed
might apply just to a single memslot.
This significantly hurts performance of per-hva operations with higher
memslot counts.
Since hva ranges can overlap between memslots an interval tree is needed
for tracking them.
[sean: handle interval tree updates in kvm_replace_memslot()]
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <
d66b9974becaa9839be9c4e1a5de97b177b4ac20.
1638817640.git.maciej.szmigiero@oracle.com>
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
help
Support hosting virtualized guest machines.
help
Support hosting virtualized guest machines.
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
help
Support for hosting Guest kernels.
help
Support for hosting Guest kernels.
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
config KVM_BOOK3S_HANDLER
bool
config KVM_BOOK3S_HANDLER
bool
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
select HAVE_KVM_PM_NOTIFIER if PM
help
Support hosting fully virtualized guest machines using hardware
select HAVE_KVM_PM_NOTIFIER if PM
help
Support hosting fully virtualized guest machines using hardware
#include <linux/nospec.h>
#include <linux/notifier.h>
#include <linux/hashtable.h>
#include <linux/nospec.h>
#include <linux/notifier.h>
#include <linux/hashtable.h>
+#include <linux/interval_tree.h>
#include <linux/xarray.h>
#include <asm/signal.h>
#include <linux/xarray.h>
#include <asm/signal.h>
struct kvm_memory_slot {
struct hlist_node id_node;
struct kvm_memory_slot {
struct hlist_node id_node;
+ struct interval_tree_node hva_node;
gfn_t base_gfn;
unsigned long npages;
unsigned long *dirty_bitmap;
gfn_t base_gfn;
unsigned long npages;
unsigned long *dirty_bitmap;
*/
struct kvm_memslots {
u64 generation;
*/
struct kvm_memslots {
u64 generation;
+ struct rb_root_cached hva_tree;
/*
* The mapping table from slot id to the index in memslots[].
*
/*
* The mapping table from slot id to the index in memslots[].
*
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
+/* Iterate over each memslot intersecting [start, last] (inclusive) range */
+#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
+ for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
+ node; \
+ node = interval_tree_iter_next(node, start, last)) \
+
static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
const struct kvm_hva_range *range)
{
static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
const struct kvm_hva_range *range)
{
struct kvm_memslots *slots;
int i, idx;
struct kvm_memslots *slots;
int i, idx;
+ if (WARN_ON_ONCE(range->end <= range->start))
+ return 0;
+
/* A null handler is allowed if and only if on_lock() is provided. */
if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
IS_KVM_NULL_FN(range->handler)))
/* A null handler is allowed if and only if on_lock() is provided. */
if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
IS_KVM_NULL_FN(range->handler)))
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ struct interval_tree_node *node;
+
slots = __kvm_memslots(kvm, i);
slots = __kvm_memslots(kvm, i);
- kvm_for_each_memslot(slot, slots) {
+ kvm_for_each_memslot_in_hva_range(node, slots,
+ range->start, range->end - 1) {
unsigned long hva_start, hva_end;
unsigned long hva_start, hva_end;
+ slot = container_of(node, struct kvm_memory_slot, hva_node);
hva_start = max(range->start, slot->userspace_addr);
hva_end = min(range->end, slot->userspace_addr +
(slot->npages << PAGE_SHIFT));
hva_start = max(range->start, slot->userspace_addr);
hva_end = min(range->end, slot->userspace_addr +
(slot->npages << PAGE_SHIFT));
- if (hva_start >= hva_end)
- continue;
/*
* To optimize for the likely case where the address
/*
* To optimize for the likely case where the address
+ slots->hva_tree = RB_ROOT_CACHED;
hash_init(slots->id_hash);
return slots;
hash_init(slots->id_hash);
return slots;
struct kvm_memory_slot *new)
{
/*
struct kvm_memory_slot *new)
{
/*
- * Remove the old memslot from the hash list, copying the node data
- * would corrupt the list.
+ * Remove the old memslot from the hash list and interval tree, copying
+ * the node data would corrupt the structures.
*/
if (old) {
hash_del(&old->id_node);
*/
if (old) {
hash_del(&old->id_node);
+ interval_tree_remove(&old->hva_node, &slots->hva_tree);
if (!new)
return;
/* Copy the source *data*, not the pointer, to the destination. */
*new = *old;
if (!new)
return;
/* Copy the source *data*, not the pointer, to the destination. */
*new = *old;
+ } else {
+ /* If @old is NULL, initialize @new's hva range. */
+ new->hva_node.start = new->userspace_addr;
+ new->hva_node.last = new->userspace_addr +
+ (new->npages << PAGE_SHIFT) - 1;
}
/* (Re)Add the new memslot. */
hash_add(slots->id_hash, &new->id_node, new->id);
}
/* (Re)Add the new memslot. */
hash_add(slots->id_hash, &new->id_node, new->id);
+ interval_tree_insert(&new->hva_node, &slots->hva_tree);
}
static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src)
}
static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src)
atomic_set(&slots->last_used_slot, 0);
/*
atomic_set(&slots->last_used_slot, 0);
/*
- * Remove the to-be-deleted memslot from the list _before_ shifting
+ * Remove the to-be-deleted memslot from the list/tree _before_ shifting
* the trailing memslots forward, its data will be overwritten.
* Defer the (somewhat pointless) copying of the memslot until after
* the last slot has been shifted to avoid overwriting said last slot.
* the trailing memslots forward, its data will be overwritten.
* Defer the (somewhat pointless) copying of the memslot until after
* the last slot has been shifted to avoid overwriting said last slot.
* itself is not preserved in the array, i.e. not swapped at this time, only
* its new index into the array is tracked. Returns the changed memslot's
* current index into the memslots array.
* itself is not preserved in the array, i.e. not swapped at this time, only
* its new index into the array is tracked. Returns the changed memslot's
* current index into the memslots array.
- * The memslot at the returned index will not be in @slots->id_hash by then.
+ * The memslot at the returned index will not be in @slots->hva_tree or
+ * @slots->id_hash by then.
* @memslot is a detached struct with desired final data of the changed slot.
*/
static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
* @memslot is a detached struct with desired final data of the changed slot.
*/
static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
- * Delete the slot from the hash table before sorting the remaining
- * slots, the slot's data may be overwritten when copying slots as part
- * of the sorting proccess. update_memslots() will unconditionally
- * rewrite the entire slot and re-add it to the hash table.
+ * Delete the slot from the hash table and interval tree before sorting
+ * the remaining slots, the slot's data may be overwritten when copying
+ * slots as part of the sorting proccess. update_memslots() will
+ * unconditionally rewrite and re-add the entire slot.
*/
kvm_replace_memslot(slots, oldslot, NULL);
*/
kvm_replace_memslot(slots, oldslot, NULL);
* is not preserved in the array, i.e. not swapped at this time, only its new
* index into the array is tracked. Returns the changed memslot's final index
* into the memslots array.
* is not preserved in the array, i.e. not swapped at this time, only its new
* index into the array is tracked. Returns the changed memslot's final index
* into the memslots array.
- * The memslot at the returned index will not be in @slots->id_hash by then.
+ * The memslot at the returned index will not be in @slots->hva_tree or
+ * @slots->id_hash by then.
* @memslot is a detached struct with desired final data of the new or
* changed slot.
* @memslot is a detached struct with desired final data of the new or
* changed slot.
- * Assumes that the memslot at @start index is not in @slots->id_hash.
+ * Assumes that the memslot at @start index is not in @slots->hva_tree or
+ * @slots->id_hash.
*/
static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
struct kvm_memory_slot *memslot,
*/
static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
struct kvm_memory_slot *memslot,
memcpy(slots, old, kvm_memslots_size(old->used_slots));
memcpy(slots, old, kvm_memslots_size(old->used_slots));
+ slots->hva_tree = RB_ROOT_CACHED;
hash_init(slots->id_hash);
hash_init(slots->id_hash);
- kvm_for_each_memslot(memslot, slots)
+ kvm_for_each_memslot(memslot, slots) {
+ interval_tree_insert(&memslot->hva_node, &slots->hva_tree);
hash_add(slots->id_hash, &memslot->id_node, memslot->id);
hash_add(slots->id_hash, &memslot->id_node, memslot->id);