From 3412b6ae2924e068f9932f841bdea0f2d8424502 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Apr 2013 14:10:40 +0930 Subject: [PATCH] lguest: don't share Switcher PTE pages between guests. We currently use the whole top PGD entry for the switcher, so we simply share a fixed page of PTEs between all guests (actually, it's one per Host CPU, to ensure isolation between guests). Changes to a scheme where every guest has its own mappings. Signed-off-by: Rusty Russell --- drivers/lguest/core.c | 10 +- drivers/lguest/lg.h | 3 - drivers/lguest/page_tables.c | 260 ++++++++++++++++++------------------------- 3 files changed, 107 insertions(+), 166 deletions(-) diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index b6c71c3..7e1d7ee 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -333,15 +333,10 @@ static int __init init(void) if (err) goto out; - /* Now we set up the pagetable implementation for the Guests. */ - err = init_pagetables(lg_switcher_pages); - if (err) - goto unmap; - /* We might need to reserve an interrupt vector. */ err = init_interrupts(); if (err) - goto free_pgtables; + goto unmap; /* /dev/lguest needs to be registered. */ err = lguest_device_init(); @@ -356,8 +351,6 @@ static int __init init(void) free_interrupts: free_interrupts(); -free_pgtables: - free_pagetables(); unmap: unmap_switcher(); out: @@ -369,7 +362,6 @@ static void __exit fini(void) { lguest_device_remove(); free_interrupts(); - free_pagetables(); unmap_switcher(); lguest_arch_host_fini(); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 9a345ef..faac9fc 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -14,9 +14,6 @@ #include -void free_pagetables(void); -int init_pagetables(struct page **switcher_pages); - struct pgdir { unsigned long gpgdir; pgd_t *pgdir; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 009c717..1f48f271 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -62,20 +62,11 @@ * will need the last pmd entry of the last pmd page. */ #ifdef CONFIG_X86_PAE -#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1) #define CHECK_GPGD_MASK _PAGE_PRESENT #else #define CHECK_GPGD_MASK _PAGE_TABLE #endif -/* - * We actually need a separate PTE page for each CPU. Remember that after the - * Switcher code itself comes two pages for each CPU, and we don't want this - * CPU's guest to see the pages of any other CPU. - */ -static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); -#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) - /*H:320 * The page table code is curly enough to need helper functions to keep it * clear and clean. The kernel itself provides many of them; one advantage @@ -714,9 +705,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, int *blank_pgdir) { unsigned int next; -#ifdef CONFIG_X86_PAE - pmd_t *pmd_table; -#endif /* * We pick one entry at random to throw out. Choosing the Least @@ -731,29 +719,11 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, if (!cpu->lg->pgdirs[next].pgdir) next = cpu->cpu_pgd; else { -#ifdef CONFIG_X86_PAE /* - * In PAE mode, allocate a pmd page and populate the - * last pgd entry. + * This is a blank page, so there are no kernel + * mappings: caller must map the stack! */ - pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL); - if (!pmd_table) { - free_page((long)cpu->lg->pgdirs[next].pgdir); - set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0)); - next = cpu->cpu_pgd; - } else { - set_pgd(cpu->lg->pgdirs[next].pgdir + - SWITCHER_PGD_INDEX, - __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - /* - * This is a blank page, so there are no kernel - * mappings: caller must map the stack! - */ - *blank_pgdir = 1; - } -#else *blank_pgdir = 1; -#endif } } /* Record which Guest toplevel this shadows. */ @@ -764,6 +734,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, return next; } +/*H:501 + * We do need the Switcher code mapped at all times, so we allocate that + * part of the Guest page table here, and populate it when we're about to run + * the guest. + */ +static bool allocate_switcher_mapping(struct lg_cpu *cpu) +{ + int i; + + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { + if (!find_spte(cpu, switcher_addr + i * PAGE_SIZE, true, + CHECK_GPGD_MASK, _PAGE_TABLE)) + return false; + } + return true; +} + /*H:470 * Finally, a routine which throws away everything: all PGD entries in all * the shadow page tables, including the Guest's kernel mappings. This is used @@ -774,28 +761,14 @@ static void release_all_pagetables(struct lguest *lg) unsigned int i, j; /* Every shadow pagetable this Guest has */ - for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) - if (lg->pgdirs[i].pgdir) { -#ifdef CONFIG_X86_PAE - pgd_t *spgd; - pmd_t *pmdpage; - unsigned int k; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) { + if (!lg->pgdirs[i].pgdir) + continue; - /* Get the last pmd page. */ - spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX; - pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT); - - /* - * And release the pmd entries of that pmd page, - * except for the switcher pmd. - */ - for (k = 0; k < SWITCHER_PMD_INDEX; k++) - release_pmd(&pmdpage[k]); -#endif - /* Every PGD entry except the Switcher at the top */ - for (j = 0; j < SWITCHER_PGD_INDEX; j++) - release_pgd(lg->pgdirs[i].pgdir + j); - } + /* Every PGD entry. */ + for (j = 0; j < PTRS_PER_PGD; j++) + release_pgd(lg->pgdirs[i].pgdir + j); + } } /* @@ -809,6 +782,9 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) release_all_pagetables(cpu->lg); /* We need the Guest kernel stack mapped again. */ pin_stack_pages(cpu); + /* And we need Switcher allocated. */ + if (!allocate_switcher_mapping(cpu)) + kill_guest(cpu, "Cannot populate switcher mapping"); } /*H:430 @@ -844,9 +820,15 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) newpgdir = new_pgdir(cpu, pgtable, &repin); /* Change the current pgd index to the new one. */ cpu->cpu_pgd = newpgdir; - /* If it was completely blank, we map in the Guest kernel stack */ + /* + * If it was completely blank, we map in the Guest kernel stack and + * the Switcher. + */ if (repin) pin_stack_pages(cpu); + + if (!allocate_switcher_mapping(cpu)) + kill_guest(cpu, "Cannot populate switcher mapping"); } /*:*/ @@ -976,14 +958,23 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) { int pgdir; - if (idx >= SWITCHER_PGD_INDEX) + if (idx > PTRS_PER_PGD) { + kill_guest(&lg->cpus[0], "Attempt to set pgd %u/%u", + idx, PTRS_PER_PGD); return; + } /* If they're talking about a page table we have a shadow for... */ pgdir = find_pgdir(lg, gpgdir); - if (pgdir < ARRAY_SIZE(lg->pgdirs)) + if (pgdir < ARRAY_SIZE(lg->pgdirs)) { /* ... throw it away. */ release_pgd(lg->pgdirs[pgdir].pgdir + idx); + /* That might have been the Switcher mapping, remap it. */ + if (!allocate_switcher_mapping(&lg->cpus[0])) { + kill_guest(&lg->cpus[0], + "Cannot populate switcher mapping"); + } + } } #ifdef CONFIG_X86_PAE @@ -1001,6 +992,9 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) * we will populate on future faults. The Guest doesn't have any actual * pagetables yet, so we set linear_pages to tell demand_page() to fake it * for the moment. + * + * We do need the Switcher to be mapped at all times, so we allocate that + * part of the Guest page table here. */ int init_guest_pagetable(struct lguest *lg) { @@ -1014,6 +1008,13 @@ int init_guest_pagetable(struct lguest *lg) /* We start with a linear mapping until the initialize. */ cpu->linear_pages = true; + + /* Allocate the page tables for the Switcher. */ + if (!allocate_switcher_mapping(cpu)) { + release_all_pagetables(lg); + return -ENOMEM; + } + return 0; } @@ -1065,91 +1066,68 @@ void free_guest_pagetable(struct lguest *lg) * (vi) Mapping the Switcher when the Guest is about to run. * * The Switcher and the two pages for this CPU need to be visible in the - * Guest (and not the pages for other CPUs). We have the appropriate PTE pages - * for each CPU already set up, we just need to hook them in now we know which - * Guest is about to run on this CPU. + * Guest (and not the pages for other CPUs). + * + * The pages have all been allocate */ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { - pte_t *switcher_pte_page = __this_cpu_read(switcher_pte_pages); - pte_t regs_pte; + unsigned long base, i; + struct page *percpu_switcher_page, *regs_page; + pte_t *pte; -#ifdef CONFIG_X86_PAE - pmd_t switcher_pmd; - pmd_t *pmd_table; - - switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT, - PAGE_KERNEL_EXEC); - - /* Figure out where the pmd page is, by reading the PGD, and converting - * it to a virtual address. */ - pmd_table = __va(pgd_pfn(cpu->lg-> - pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX]) - << PAGE_SHIFT); - /* Now write it into the shadow page table. */ - set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd); -#else - pgd_t switcher_pgd; + /* Code page should always be mapped, and executable. */ + pte = find_spte(cpu, switcher_addr, false, 0, 0); + get_page(lg_switcher_pages[0]); + set_pte(pte, mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX)); - /* - * Make the last PGD entry for this Guest point to the Switcher's PTE - * page for this CPU (with appropriate flags). - */ - switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC); + /* Clear all the Switcher mappings for any other CPUs. */ + /* FIXME: This is dumb: update only when Host CPU changes. */ + for_each_possible_cpu(i) { + /* Get location of lguest_pages (indexed by Host CPU) */ + base = switcher_addr + PAGE_SIZE + + i * sizeof(struct lguest_pages); - cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; + /* Get shadow PTE for first page (where we put guest regs). */ + pte = find_spte(cpu, base, false, 0, 0); + set_pte(pte, __pte(0)); + + /* This is where we put R/O state. */ + pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0); + set_pte(pte, __pte(0)); + } -#endif /* - * We also change the Switcher PTE page. When we're running the Guest, - * we want the Guest's "regs" page to appear where the first Switcher - * page for this CPU is. This is an optimization: when the Switcher - * saves the Guest registers, it saves them into the first page of this - * CPU's "struct lguest_pages": if we make sure the Guest's register - * page is already mapped there, we don't have to copy them out - * again. + * When we're running the Guest, we want the Guest's "regs" page to + * appear where the first Switcher page for this CPU is. This is an + * optimization: when the Switcher saves the Guest registers, it saves + * them into the first page of this CPU's "struct lguest_pages": if we + * make sure the Guest's register page is already mapped there, we + * don't have to copy them out again. */ - regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL); - set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte); -} -/*:*/ - -static void free_switcher_pte_pages(void) -{ - unsigned int i; - - for_each_possible_cpu(i) - free_page((long)switcher_pte_page(i)); -} - -/*H:520 - * Setting up the Switcher PTE page for given CPU is fairly easy, given - * the CPU number and the "struct page"s for the Switcher and per-cpu pages. - */ -static __init void populate_switcher_pte_page(unsigned int cpu, - struct page *switcher_pages[]) -{ - pte_t *pte = switcher_pte_page(cpu); - int i; - - /* The first entries maps the Switcher code. */ - set_pte(&pte[0], mk_pte(switcher_pages[0], - __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); - - /* The only other thing we map is this CPU's pair of pages. */ - i = 1 + cpu*2; - - /* First page (Guest registers) is writable from the Guest */ - set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_pages[i]), - __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW))); + /* Find the shadow PTE for this regs page. */ + base = switcher_addr + PAGE_SIZE + + raw_smp_processor_id() * sizeof(struct lguest_pages); + pte = find_spte(cpu, base, false, 0, 0); + regs_page = pfn_to_page(__pa(cpu->regs_page) >> PAGE_SHIFT); + get_page(regs_page); + set_pte(pte, mk_pte(regs_page, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL))); /* - * The second page contains the "struct lguest_ro_state", and is - * read-only. + * We map the second page of the struct lguest_pages read-only in + * the Guest: the IDT, GDT and other things it's not supposed to + * change. */ - set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_pages[i+1]), - __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED))); + base += PAGE_SIZE; + pte = find_spte(cpu, base, false, 0, 0); + + percpu_switcher_page + = lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1]; + get_page(percpu_switcher_page); + set_pte(pte, mk_pte(percpu_switcher_page, + __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL))); } +/*:*/ /* * We've made it through the page table code. Perhaps our tired brains are @@ -1163,29 +1141,3 @@ static __init void populate_switcher_pte_page(unsigned int cpu, * * There is just one file remaining in the Host. */ - -/*H:510 - * At boot or module load time, init_pagetables() allocates and populates - * the Switcher PTE page for each CPU. - */ -__init int init_pagetables(struct page **switcher_pages) -{ - unsigned int i; - - for_each_possible_cpu(i) { - switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!switcher_pte_page(i)) { - free_switcher_pte_pages(); - return -ENOMEM; - } - populate_switcher_pte_page(i, switcher_pages); - } - return 0; -} -/*:*/ - -/* Cleaning up simply involves freeing the PTE page for each CPU. */ -void free_pagetables(void) -{ - free_switcher_pte_pages(); -} -- 2.7.4