vt_ioctl: fix GIO_UNIMAP regression
[platform/kernel/linux-rpi.git] / arch / arm64 / kvm / hyp / pgtable.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4  * No bombay mix was harmed in the writing of this file.
5  *
6  * Copyright (C) 2020 Google LLC
7  * Author: Will Deacon <will@kernel.org>
8  */
9
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12
13 #define KVM_PGTABLE_MAX_LEVELS          4U
14
15 #define KVM_PTE_VALID                   BIT(0)
16
17 #define KVM_PTE_TYPE                    BIT(1)
18 #define KVM_PTE_TYPE_BLOCK              0
19 #define KVM_PTE_TYPE_PAGE               1
20 #define KVM_PTE_TYPE_TABLE              1
21
22 #define KVM_PTE_ADDR_MASK               GENMASK(47, PAGE_SHIFT)
23 #define KVM_PTE_ADDR_51_48              GENMASK(15, 12)
24
25 #define KVM_PTE_LEAF_ATTR_LO            GENMASK(11, 2)
26
27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP      GENMASK(7, 6)
29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO   3
30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW   1
31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH      GENMASK(9, 8)
32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS   3
33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF      BIT(10)
34
35 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
36 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R  BIT(6)
37 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W  BIT(7)
38 #define KVM_PTE_LEAF_ATTR_LO_S2_SH      GENMASK(9, 8)
39 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS   3
40 #define KVM_PTE_LEAF_ATTR_LO_S2_AF      BIT(10)
41
42 #define KVM_PTE_LEAF_ATTR_HI            GENMASK(63, 51)
43
44 #define KVM_PTE_LEAF_ATTR_HI_S1_XN      BIT(54)
45
46 #define KVM_PTE_LEAF_ATTR_HI_S2_XN      BIT(54)
47
48 struct kvm_pgtable_walk_data {
49         struct kvm_pgtable              *pgt;
50         struct kvm_pgtable_walker       *walker;
51
52         u64                             addr;
53         u64                             end;
54 };
55
56 static u64 kvm_granule_shift(u32 level)
57 {
58         /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
59         return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
60 }
61
62 static u64 kvm_granule_size(u32 level)
63 {
64         return BIT(kvm_granule_shift(level));
65 }
66
67 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
68 {
69         u64 granule = kvm_granule_size(level);
70
71         /*
72          * Reject invalid block mappings and don't bother with 4TB mappings for
73          * 52-bit PAs.
74          */
75         if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1))
76                 return false;
77
78         if (granule > (end - addr))
79                 return false;
80
81         return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
82 }
83
84 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
85 {
86         u64 shift = kvm_granule_shift(level);
87         u64 mask = BIT(PAGE_SHIFT - 3) - 1;
88
89         return (data->addr >> shift) & mask;
90 }
91
92 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
93 {
94         u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
95         u64 mask = BIT(pgt->ia_bits) - 1;
96
97         return (addr & mask) >> shift;
98 }
99
100 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
101 {
102         return __kvm_pgd_page_idx(data->pgt, data->addr);
103 }
104
105 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
106 {
107         struct kvm_pgtable pgt = {
108                 .ia_bits        = ia_bits,
109                 .start_level    = start_level,
110         };
111
112         return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
113 }
114
115 static bool kvm_pte_valid(kvm_pte_t pte)
116 {
117         return pte & KVM_PTE_VALID;
118 }
119
120 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
121 {
122         if (level == KVM_PGTABLE_MAX_LEVELS - 1)
123                 return false;
124
125         if (!kvm_pte_valid(pte))
126                 return false;
127
128         return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
129 }
130
131 static u64 kvm_pte_to_phys(kvm_pte_t pte)
132 {
133         u64 pa = pte & KVM_PTE_ADDR_MASK;
134
135         if (PAGE_SHIFT == 16)
136                 pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
137
138         return pa;
139 }
140
141 static kvm_pte_t kvm_phys_to_pte(u64 pa)
142 {
143         kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
144
145         if (PAGE_SHIFT == 16)
146                 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
147
148         return pte;
149 }
150
151 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
152 {
153         return __va(kvm_pte_to_phys(pte));
154 }
155
156 static void kvm_set_invalid_pte(kvm_pte_t *ptep)
157 {
158         kvm_pte_t pte = *ptep;
159         WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID);
160 }
161
162 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
163 {
164         kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp));
165
166         pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
167         pte |= KVM_PTE_VALID;
168
169         WARN_ON(kvm_pte_valid(old));
170         smp_store_release(ptep, pte);
171 }
172
173 static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
174                                    u32 level)
175 {
176         kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa);
177         u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
178                                                            KVM_PTE_TYPE_BLOCK;
179
180         pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
181         pte |= FIELD_PREP(KVM_PTE_TYPE, type);
182         pte |= KVM_PTE_VALID;
183
184         /* Tolerate KVM recreating the exact same mapping. */
185         if (kvm_pte_valid(old))
186                 return old == pte;
187
188         smp_store_release(ptep, pte);
189         return true;
190 }
191
192 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
193                                   u32 level, kvm_pte_t *ptep,
194                                   enum kvm_pgtable_walk_flags flag)
195 {
196         struct kvm_pgtable_walker *walker = data->walker;
197         return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
198 }
199
200 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
201                               kvm_pte_t *pgtable, u32 level);
202
203 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
204                                       kvm_pte_t *ptep, u32 level)
205 {
206         int ret = 0;
207         u64 addr = data->addr;
208         kvm_pte_t *childp, pte = *ptep;
209         bool table = kvm_pte_table(pte, level);
210         enum kvm_pgtable_walk_flags flags = data->walker->flags;
211
212         if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
213                 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
214                                              KVM_PGTABLE_WALK_TABLE_PRE);
215         }
216
217         if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
218                 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
219                                              KVM_PGTABLE_WALK_LEAF);
220                 pte = *ptep;
221                 table = kvm_pte_table(pte, level);
222         }
223
224         if (ret)
225                 goto out;
226
227         if (!table) {
228                 data->addr += kvm_granule_size(level);
229                 goto out;
230         }
231
232         childp = kvm_pte_follow(pte);
233         ret = __kvm_pgtable_walk(data, childp, level + 1);
234         if (ret)
235                 goto out;
236
237         if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
238                 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
239                                              KVM_PGTABLE_WALK_TABLE_POST);
240         }
241
242 out:
243         return ret;
244 }
245
246 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
247                               kvm_pte_t *pgtable, u32 level)
248 {
249         u32 idx;
250         int ret = 0;
251
252         if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
253                 return -EINVAL;
254
255         for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
256                 kvm_pte_t *ptep = &pgtable[idx];
257
258                 if (data->addr >= data->end)
259                         break;
260
261                 ret = __kvm_pgtable_visit(data, ptep, level);
262                 if (ret)
263                         break;
264         }
265
266         return ret;
267 }
268
269 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
270 {
271         u32 idx;
272         int ret = 0;
273         struct kvm_pgtable *pgt = data->pgt;
274         u64 limit = BIT(pgt->ia_bits);
275
276         if (data->addr > limit || data->end > limit)
277                 return -ERANGE;
278
279         if (!pgt->pgd)
280                 return -EINVAL;
281
282         for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
283                 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
284
285                 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
286                 if (ret)
287                         break;
288         }
289
290         return ret;
291 }
292
293 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
294                      struct kvm_pgtable_walker *walker)
295 {
296         struct kvm_pgtable_walk_data walk_data = {
297                 .pgt    = pgt,
298                 .addr   = ALIGN_DOWN(addr, PAGE_SIZE),
299                 .end    = PAGE_ALIGN(walk_data.addr + size),
300                 .walker = walker,
301         };
302
303         return _kvm_pgtable_walk(&walk_data);
304 }
305
306 struct hyp_map_data {
307         u64             phys;
308         kvm_pte_t       attr;
309 };
310
311 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
312                                  struct hyp_map_data *data)
313 {
314         bool device = prot & KVM_PGTABLE_PROT_DEVICE;
315         u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
316         kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
317         u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
318         u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
319                                                KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
320
321         if (!(prot & KVM_PGTABLE_PROT_R))
322                 return -EINVAL;
323
324         if (prot & KVM_PGTABLE_PROT_X) {
325                 if (prot & KVM_PGTABLE_PROT_W)
326                         return -EINVAL;
327
328                 if (device)
329                         return -EINVAL;
330         } else {
331                 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
332         }
333
334         attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
335         attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
336         attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
337         data->attr = attr;
338         return 0;
339 }
340
341 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
342                                     kvm_pte_t *ptep, struct hyp_map_data *data)
343 {
344         u64 granule = kvm_granule_size(level), phys = data->phys;
345
346         if (!kvm_block_mapping_supported(addr, end, phys, level))
347                 return false;
348
349         WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level));
350         data->phys += granule;
351         return true;
352 }
353
354 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
355                           enum kvm_pgtable_walk_flags flag, void * const arg)
356 {
357         kvm_pte_t *childp;
358
359         if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
360                 return 0;
361
362         if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
363                 return -EINVAL;
364
365         childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
366         if (!childp)
367                 return -ENOMEM;
368
369         kvm_set_table_pte(ptep, childp);
370         return 0;
371 }
372
373 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
374                         enum kvm_pgtable_prot prot)
375 {
376         int ret;
377         struct hyp_map_data map_data = {
378                 .phys   = ALIGN_DOWN(phys, PAGE_SIZE),
379         };
380         struct kvm_pgtable_walker walker = {
381                 .cb     = hyp_map_walker,
382                 .flags  = KVM_PGTABLE_WALK_LEAF,
383                 .arg    = &map_data,
384         };
385
386         ret = hyp_map_set_prot_attr(prot, &map_data);
387         if (ret)
388                 return ret;
389
390         ret = kvm_pgtable_walk(pgt, addr, size, &walker);
391         dsb(ishst);
392         isb();
393         return ret;
394 }
395
396 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
397 {
398         u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
399
400         pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
401         if (!pgt->pgd)
402                 return -ENOMEM;
403
404         pgt->ia_bits            = va_bits;
405         pgt->start_level        = KVM_PGTABLE_MAX_LEVELS - levels;
406         pgt->mmu                = NULL;
407         return 0;
408 }
409
410 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
411                            enum kvm_pgtable_walk_flags flag, void * const arg)
412 {
413         free_page((unsigned long)kvm_pte_follow(*ptep));
414         return 0;
415 }
416
417 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
418 {
419         struct kvm_pgtable_walker walker = {
420                 .cb     = hyp_free_walker,
421                 .flags  = KVM_PGTABLE_WALK_TABLE_POST,
422         };
423
424         WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
425         free_page((unsigned long)pgt->pgd);
426         pgt->pgd = NULL;
427 }
428
429 struct stage2_map_data {
430         u64                             phys;
431         kvm_pte_t                       attr;
432
433         kvm_pte_t                       *anchor;
434
435         struct kvm_s2_mmu               *mmu;
436         struct kvm_mmu_memory_cache     *memcache;
437 };
438
439 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
440                                     struct stage2_map_data *data)
441 {
442         bool device = prot & KVM_PGTABLE_PROT_DEVICE;
443         kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
444                             PAGE_S2_MEMATTR(NORMAL);
445         u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
446
447         if (!(prot & KVM_PGTABLE_PROT_X))
448                 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
449         else if (device)
450                 return -EINVAL;
451
452         if (prot & KVM_PGTABLE_PROT_R)
453                 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
454
455         if (prot & KVM_PGTABLE_PROT_W)
456                 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
457
458         attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
459         attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
460         data->attr = attr;
461         return 0;
462 }
463
464 static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
465                                        kvm_pte_t *ptep,
466                                        struct stage2_map_data *data)
467 {
468         u64 granule = kvm_granule_size(level), phys = data->phys;
469
470         if (!kvm_block_mapping_supported(addr, end, phys, level))
471                 return false;
472
473         if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
474                 goto out;
475
476         /* There's an existing valid leaf entry, so perform break-before-make */
477         kvm_set_invalid_pte(ptep);
478         kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
479         kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
480 out:
481         data->phys += granule;
482         return true;
483 }
484
485 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
486                                      kvm_pte_t *ptep,
487                                      struct stage2_map_data *data)
488 {
489         if (data->anchor)
490                 return 0;
491
492         if (!kvm_block_mapping_supported(addr, end, data->phys, level))
493                 return 0;
494
495         kvm_set_invalid_pte(ptep);
496         kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
497         data->anchor = ptep;
498         return 0;
499 }
500
501 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
502                                 struct stage2_map_data *data)
503 {
504         kvm_pte_t *childp, pte = *ptep;
505         struct page *page = virt_to_page(ptep);
506
507         if (data->anchor) {
508                 if (kvm_pte_valid(pte))
509                         put_page(page);
510
511                 return 0;
512         }
513
514         if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
515                 goto out_get_page;
516
517         if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
518                 return -EINVAL;
519
520         if (!data->memcache)
521                 return -ENOMEM;
522
523         childp = kvm_mmu_memory_cache_alloc(data->memcache);
524         if (!childp)
525                 return -ENOMEM;
526
527         /*
528          * If we've run into an existing block mapping then replace it with
529          * a table. Accesses beyond 'end' that fall within the new table
530          * will be mapped lazily.
531          */
532         if (kvm_pte_valid(pte)) {
533                 kvm_set_invalid_pte(ptep);
534                 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
535                 put_page(page);
536         }
537
538         kvm_set_table_pte(ptep, childp);
539
540 out_get_page:
541         get_page(page);
542         return 0;
543 }
544
545 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
546                                       kvm_pte_t *ptep,
547                                       struct stage2_map_data *data)
548 {
549         int ret = 0;
550
551         if (!data->anchor)
552                 return 0;
553
554         free_page((unsigned long)kvm_pte_follow(*ptep));
555         put_page(virt_to_page(ptep));
556
557         if (data->anchor == ptep) {
558                 data->anchor = NULL;
559                 ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
560         }
561
562         return ret;
563 }
564
565 /*
566  * This is a little fiddly, as we use all three of the walk flags. The idea
567  * is that the TABLE_PRE callback runs for table entries on the way down,
568  * looking for table entries which we could conceivably replace with a
569  * block entry for this mapping. If it finds one, then it sets the 'anchor'
570  * field in 'struct stage2_map_data' to point at the table entry, before
571  * clearing the entry to zero and descending into the now detached table.
572  *
573  * The behaviour of the LEAF callback then depends on whether or not the
574  * anchor has been set. If not, then we're not using a block mapping higher
575  * up the table and we perform the mapping at the existing leaves instead.
576  * If, on the other hand, the anchor _is_ set, then we drop references to
577  * all valid leaves so that the pages beneath the anchor can be freed.
578  *
579  * Finally, the TABLE_POST callback does nothing if the anchor has not
580  * been set, but otherwise frees the page-table pages while walking back up
581  * the page-table, installing the block entry when it revisits the anchor
582  * pointer and clearing the anchor to NULL.
583  */
584 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
585                              enum kvm_pgtable_walk_flags flag, void * const arg)
586 {
587         struct stage2_map_data *data = arg;
588
589         switch (flag) {
590         case KVM_PGTABLE_WALK_TABLE_PRE:
591                 return stage2_map_walk_table_pre(addr, end, level, ptep, data);
592         case KVM_PGTABLE_WALK_LEAF:
593                 return stage2_map_walk_leaf(addr, end, level, ptep, data);
594         case KVM_PGTABLE_WALK_TABLE_POST:
595                 return stage2_map_walk_table_post(addr, end, level, ptep, data);
596         }
597
598         return -EINVAL;
599 }
600
601 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
602                            u64 phys, enum kvm_pgtable_prot prot,
603                            struct kvm_mmu_memory_cache *mc)
604 {
605         int ret;
606         struct stage2_map_data map_data = {
607                 .phys           = ALIGN_DOWN(phys, PAGE_SIZE),
608                 .mmu            = pgt->mmu,
609                 .memcache       = mc,
610         };
611         struct kvm_pgtable_walker walker = {
612                 .cb             = stage2_map_walker,
613                 .flags          = KVM_PGTABLE_WALK_TABLE_PRE |
614                                   KVM_PGTABLE_WALK_LEAF |
615                                   KVM_PGTABLE_WALK_TABLE_POST,
616                 .arg            = &map_data,
617         };
618
619         ret = stage2_map_set_prot_attr(prot, &map_data);
620         if (ret)
621                 return ret;
622
623         ret = kvm_pgtable_walk(pgt, addr, size, &walker);
624         dsb(ishst);
625         return ret;
626 }
627
628 static void stage2_flush_dcache(void *addr, u64 size)
629 {
630         if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
631                 return;
632
633         __flush_dcache_area(addr, size);
634 }
635
636 static bool stage2_pte_cacheable(kvm_pte_t pte)
637 {
638         u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte);
639         return memattr == PAGE_S2_MEMATTR(NORMAL);
640 }
641
642 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
643                                enum kvm_pgtable_walk_flags flag,
644                                void * const arg)
645 {
646         struct kvm_s2_mmu *mmu = arg;
647         kvm_pte_t pte = *ptep, *childp = NULL;
648         bool need_flush = false;
649
650         if (!kvm_pte_valid(pte))
651                 return 0;
652
653         if (kvm_pte_table(pte, level)) {
654                 childp = kvm_pte_follow(pte);
655
656                 if (page_count(virt_to_page(childp)) != 1)
657                         return 0;
658         } else if (stage2_pte_cacheable(pte)) {
659                 need_flush = true;
660         }
661
662         /*
663          * This is similar to the map() path in that we unmap the entire
664          * block entry and rely on the remaining portions being faulted
665          * back lazily.
666          */
667         kvm_set_invalid_pte(ptep);
668         kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
669         put_page(virt_to_page(ptep));
670
671         if (need_flush) {
672                 stage2_flush_dcache(kvm_pte_follow(pte),
673                                     kvm_granule_size(level));
674         }
675
676         if (childp)
677                 free_page((unsigned long)childp);
678
679         return 0;
680 }
681
682 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
683 {
684         struct kvm_pgtable_walker walker = {
685                 .cb     = stage2_unmap_walker,
686                 .arg    = pgt->mmu,
687                 .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
688         };
689
690         return kvm_pgtable_walk(pgt, addr, size, &walker);
691 }
692
693 struct stage2_attr_data {
694         kvm_pte_t       attr_set;
695         kvm_pte_t       attr_clr;
696         kvm_pte_t       pte;
697         u32             level;
698 };
699
700 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
701                               enum kvm_pgtable_walk_flags flag,
702                               void * const arg)
703 {
704         kvm_pte_t pte = *ptep;
705         struct stage2_attr_data *data = arg;
706
707         if (!kvm_pte_valid(pte))
708                 return 0;
709
710         data->level = level;
711         data->pte = pte;
712         pte &= ~data->attr_clr;
713         pte |= data->attr_set;
714
715         /*
716          * We may race with the CPU trying to set the access flag here,
717          * but worst-case the access flag update gets lost and will be
718          * set on the next access instead.
719          */
720         if (data->pte != pte)
721                 WRITE_ONCE(*ptep, pte);
722
723         return 0;
724 }
725
726 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
727                                     u64 size, kvm_pte_t attr_set,
728                                     kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
729                                     u32 *level)
730 {
731         int ret;
732         kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
733         struct stage2_attr_data data = {
734                 .attr_set       = attr_set & attr_mask,
735                 .attr_clr       = attr_clr & attr_mask,
736         };
737         struct kvm_pgtable_walker walker = {
738                 .cb             = stage2_attr_walker,
739                 .arg            = &data,
740                 .flags          = KVM_PGTABLE_WALK_LEAF,
741         };
742
743         ret = kvm_pgtable_walk(pgt, addr, size, &walker);
744         if (ret)
745                 return ret;
746
747         if (orig_pte)
748                 *orig_pte = data.pte;
749
750         if (level)
751                 *level = data.level;
752         return 0;
753 }
754
755 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
756 {
757         return stage2_update_leaf_attrs(pgt, addr, size, 0,
758                                         KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
759                                         NULL, NULL);
760 }
761
762 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
763 {
764         kvm_pte_t pte = 0;
765         stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
766                                  &pte, NULL);
767         dsb(ishst);
768         return pte;
769 }
770
771 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
772 {
773         kvm_pte_t pte = 0;
774         stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
775                                  &pte, NULL);
776         /*
777          * "But where's the TLBI?!", you scream.
778          * "Over in the core code", I sigh.
779          *
780          * See the '->clear_flush_young()' callback on the KVM mmu notifier.
781          */
782         return pte;
783 }
784
785 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
786 {
787         kvm_pte_t pte = 0;
788         stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
789         return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
790 }
791
792 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
793                                    enum kvm_pgtable_prot prot)
794 {
795         int ret;
796         u32 level;
797         kvm_pte_t set = 0, clr = 0;
798
799         if (prot & KVM_PGTABLE_PROT_R)
800                 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
801
802         if (prot & KVM_PGTABLE_PROT_W)
803                 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
804
805         if (prot & KVM_PGTABLE_PROT_X)
806                 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
807
808         ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
809         if (!ret)
810                 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
811         return ret;
812 }
813
814 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
815                                enum kvm_pgtable_walk_flags flag,
816                                void * const arg)
817 {
818         kvm_pte_t pte = *ptep;
819
820         if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
821                 return 0;
822
823         stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level));
824         return 0;
825 }
826
827 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
828 {
829         struct kvm_pgtable_walker walker = {
830                 .cb     = stage2_flush_walker,
831                 .flags  = KVM_PGTABLE_WALK_LEAF,
832         };
833
834         if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
835                 return 0;
836
837         return kvm_pgtable_walk(pgt, addr, size, &walker);
838 }
839
840 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
841 {
842         size_t pgd_sz;
843         u64 vtcr = kvm->arch.vtcr;
844         u32 ia_bits = VTCR_EL2_IPA(vtcr);
845         u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
846         u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
847
848         pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
849         pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO);
850         if (!pgt->pgd)
851                 return -ENOMEM;
852
853         pgt->ia_bits            = ia_bits;
854         pgt->start_level        = start_level;
855         pgt->mmu                = &kvm->arch.mmu;
856
857         /* Ensure zeroed PGD pages are visible to the hardware walker */
858         dsb(ishst);
859         return 0;
860 }
861
862 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
863                               enum kvm_pgtable_walk_flags flag,
864                               void * const arg)
865 {
866         kvm_pte_t pte = *ptep;
867
868         if (!kvm_pte_valid(pte))
869                 return 0;
870
871         put_page(virt_to_page(ptep));
872
873         if (kvm_pte_table(pte, level))
874                 free_page((unsigned long)kvm_pte_follow(pte));
875
876         return 0;
877 }
878
879 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
880 {
881         size_t pgd_sz;
882         struct kvm_pgtable_walker walker = {
883                 .cb     = stage2_free_walker,
884                 .flags  = KVM_PGTABLE_WALK_LEAF |
885                           KVM_PGTABLE_WALK_TABLE_POST,
886         };
887
888         WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
889         pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
890         free_pages_exact(pgt->pgd, pgd_sz);
891         pgt->pgd = NULL;
892 }