PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
into the hash PTE second double word).
+
+4.75 KVM_PPC_ALLOCATE_HTAB
+
+Capability: KVM_CAP_PPC_ALLOC_HTAB
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to u32 containing hash table order (in/out)
+Returns: 0 on success, -1 on error
+
+This requests the host kernel to allocate an MMU hash table for a
+guest using the PAPR paravirtualization interface. This only does
+anything if the kernel is configured to use the Book 3S HV style of
+virtualization. Otherwise the capability doesn't exist and the ioctl
+returns an ENOTTY error. The rest of this description assumes Book 3S
+HV.
+
+There must be no vcpus running when this ioctl is called; if there
+are, it will do nothing and return an EBUSY error.
+
+The parameter is a pointer to a 32-bit unsigned integer variable
+containing the order (log base 2) of the desired size of the hash
+table, which must be between 18 and 46. On successful return from the
+ioctl, it will have been updated with the order of the hash table that
+was allocated.
+
+If no hash table has been allocated when any vcpu is asked to run
+(with the KVM_RUN ioctl), the host kernel will allocate a
+default-sized hash table (16 MB).
+
+If this ioctl is called when a hash table has already been allocated,
+the kernel will clear out the existing hash table (zero all HPTEs) and
+return the hash table order in the parameter. (If the guest is using
+the virtualized real-mode area (VRMA) facility, the kernel will
+re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
+
+
5. The kvm_run structure
------------------------
#define SPAPR_TCE_SHIFT 12
#ifdef CONFIG_KVM_BOOK3S_64_HV
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER 24
-#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
-#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */
-#define HPT_HASH_MASK (HPT_NPTEG - 1)
+#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
+extern int kvm_hpt_order; /* order of preallocated HPTs */
#endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
+ u32 hpt_order;
+ atomic_t vcpus_running;
+ unsigned long hpt_npte;
+ unsigned long hpt_mask;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
-extern long kvmppc_alloc_hpt(struct kvm *kvm);
+extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
-long kvmppc_alloc_hpt(struct kvm *kvm)
+/* Power architecture requires HPT is at least 256kB */
+#define PPC_MIN_HPT_ORDER 18
+
+long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
- long lpid;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
+ long order = kvm_hpt_order;
- /* Allocate guest's hashed page table */
- li = kvm_alloc_hpt();
- if (li) {
- /* using preallocated memory */
- hpt = (ulong)li->base_virt;
- kvm->arch.hpt_li = li;
- } else {
- /* using dynamic memory */
+ if (htab_orderp) {
+ order = *htab_orderp;
+ if (order < PPC_MIN_HPT_ORDER)
+ order = PPC_MIN_HPT_ORDER;
+ }
+
+ /*
+ * If the user wants a different size from default,
+ * try first to allocate it from the kernel page allocator.
+ */
+ hpt = 0;
+ if (order != kvm_hpt_order) {
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
- __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
+ __GFP_NOWARN, order - PAGE_SHIFT);
+ if (!hpt)
+ --order;
}
+ /* Next try to allocate from the preallocated pool */
if (!hpt) {
- pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
- return -ENOMEM;
+ li = kvm_alloc_hpt();
+ if (li) {
+ hpt = (ulong)li->base_virt;
+ kvm->arch.hpt_li = li;
+ order = kvm_hpt_order;
+ }
}
+
+ /* Lastly try successively smaller sizes from the page allocator */
+ while (!hpt && order > PPC_MIN_HPT_ORDER) {
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
+ __GFP_NOWARN, order - PAGE_SHIFT);
+ if (!hpt)
+ --order;
+ }
+
+ if (!hpt)
+ return -ENOMEM;
+
kvm->arch.hpt_virt = hpt;
+ kvm->arch.hpt_order = order;
+ /* HPTEs are 2**4 bytes long */
+ kvm->arch.hpt_npte = 1ul << (order - 4);
+ /* 128 (2**7) bytes in each HPTEG */
+ kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
/* Allocate reverse map array */
- rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+ rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
}
kvm->arch.revmap = rev;
+ kvm->arch.sdr1 = __pa(hpt) | (order - 18);
- lpid = kvmppc_alloc_lpid();
- if (lpid < 0)
- goto out_freeboth;
+ pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
+ hpt, order, kvm->arch.lpid);
- kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
- kvm->arch.lpid = lpid;
-
- pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
+ if (htab_orderp)
+ *htab_orderp = order;
return 0;
- out_freeboth:
- vfree(rev);
out_freehpt:
- free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+ if (kvm->arch.hpt_li)
+ kvm_release_hpt(kvm->arch.hpt_li);
+ else
+ free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
}
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+{
+ long err = -EBUSY;
+ long order;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.rma_setup_done) {
+ kvm->arch.rma_setup_done = 0;
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.rma_setup_done = 1;
+ goto out;
+ }
+ }
+ if (kvm->arch.hpt_virt) {
+ order = kvm->arch.hpt_order;
+ /* Set the entire HPT to 0, i.e. invalid HPTEs */
+ memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+ /*
+ * Set the whole last_vcpu array to an invalid vcpu number.
+ * This ensures that each vcpu will flush its TLB on next entry.
+ */
+ memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
+ *htab_orderp = order;
+ err = 0;
+ } else {
+ err = kvmppc_alloc_hpt(kvm, htab_orderp);
+ order = *htab_orderp;
+ }
+ out:
+ mutex_unlock(&kvm->lock);
+ return err;
+}
+
void kvmppc_free_hpt(struct kvm *kvm)
{
kvmppc_free_lpid(kvm->arch.lpid);
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
else
- free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+ free_pages(kvm->arch.hpt_virt,
+ kvm->arch.hpt_order - PAGE_SHIFT);
}
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
unsigned long psize;
unsigned long hp0, hp1;
long ret;
+ struct kvm *kvm = vcpu->kvm;
psize = 1ul << porder;
npages = memslot->npages >> (porder - PAGE_SHIFT);
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
- if (npages > HPT_NPTEG)
- npages = HPT_NPTEG;
+ if (npages > kvm->arch.hpt_mask + 1)
+ npages = kvm->arch.hpt_mask + 1;
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
- hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
/*
* We assume that the hash table is empty and no
* vcpus are using it at this stage. Since we create
/* #define EXIT_DEBUG_INT */
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
return -EINTR;
}
- /* On the first time here, set up VRMA or RMA */
+ atomic_inc(&vcpu->kvm->arch.vcpus_running);
+ /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+ smp_mb();
+
+ /* On the first time here, set up HTAB and VRMA or RMA */
if (!vcpu->kvm->arch.rma_setup_done) {
- r = kvmppc_hv_setup_rma(vcpu);
+ r = kvmppc_hv_setup_htab_rma(vcpu);
if (r)
- return r;
+ goto out;
}
flush_fp_to_thread(current);
kvmppc_core_prepare_to_enter(vcpu);
}
} while (r == RESUME_GUEST);
+
+ out:
+ atomic_dec(&vcpu->kvm->arch.vcpus_running);
return r;
}
{
}
-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
if (kvm->arch.rma_setup_done)
goto out; /* another vcpu beat us to it */
+ /* Allocate hashed page table (if not done already) and reset it */
+ if (!kvm->arch.hpt_virt) {
+ err = kvmppc_alloc_hpt(kvm, NULL);
+ if (err) {
+ pr_err("KVM: Couldn't alloc HPT\n");
+ goto out;
+ }
+ }
+
/* Look up the memslot for guest physical address 0 */
memslot = gfn_to_memslot(kvm, 0);
int kvmppc_core_init_vm(struct kvm *kvm)
{
- long r;
- unsigned long lpcr;
+ unsigned long lpcr, lpid;
- /* Allocate hashed page table */
- r = kvmppc_alloc_hpt(kvm);
- if (r)
- return r;
+ /* Allocate the guest's logical partition ID */
+
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ return -ENOMEM;
+ kvm->arch.lpid = lpid;
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970; HID4 is effectively the LPCR */
- unsigned long lpid = kvm->arch.lpid;
kvm->arch.host_lpid = 0;
kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
static struct kvmppc_linear_info *kvm_alloc_linear(int type);
static void kvm_release_linear(struct kvmppc_linear_info *ri);
+int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
+EXPORT_SYMBOL_GPL(kvm_hpt_order);
+
/*************** RMA *************/
/*
void __init kvm_linear_init(void)
{
/* HPT */
- kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
+ kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
/* RMA */
/* Only do this on PPC970 in HV mode */
/* Find and lock the HPTEG slot to use */
do_insert:
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
unsigned long v, r, rb;
struct revmap_entry *rev;
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
i = 4;
break;
}
- if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+ if (req != 1 || flags == 3 ||
+ pte_index >= kvm->arch.hpt_npte) {
/* parameter error */
args[j] = ((0xa0 | flags) << 56) + pte_index;
ret = H_PARAMETER;
struct revmap_entry *rev;
unsigned long v, r, rb, mask, bits;
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
int i, n = 1;
struct revmap_entry *rev = NULL;
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
if (flags & H_READ_4) {
pte_index &= ~3;
somask = (1UL << 28) - 1;
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
}
- hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
avpn = slb_v & ~(somask >> 16); /* also includes B */
avpn |= (eaddr & somask) >> 16;
if (val & HPTE_V_SECONDARY)
break;
val |= HPTE_V_SECONDARY;
- hash = hash ^ HPT_HASH_MASK;
+ hash = hash ^ kvm->arch.hpt_mask;
}
return -1;
}
#endif
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
+ case KVM_CAP_PPC_ALLOC_HTAB:
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
r = -EFAULT;
break;
}
+
+ case KVM_PPC_ALLOCATE_HTAB: {
+ struct kvm *kvm = filp->private_data;
+ u32 htab_order;
+
+ r = -EFAULT;
+ if (get_user(htab_order, (u32 __user *)argp))
+ break;
+ r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+ if (r)
+ break;
+ r = -EFAULT;
+ if (put_user(htab_order, (u32 __user *)argp))
+ break;
+ r = 0;
+ break;
+ }
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
#define KVM_CAP_SIGNAL_MSI 77
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
+#define KVM_CAP_PPC_ALLOC_HTAB 80
#ifdef KVM_CAP_IRQ_ROUTING
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
+/* Available with KVM_CAP_PPC_ALLOC_HTAB */
+#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
/*
* ioctls for vcpu fds