Merge tag 'perf-tools-fixes-for-v6.0-2022-09-08' of git://git.kernel.org/pub/scm...
[platform/kernel/linux-starfive.git] / lib / test_hmm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This is a module to test the HMM (Heterogeneous Memory Management)
4  * mirror and zone device private memory migration APIs of the kernel.
5  * Userspace programs can register with the driver to mirror their own address
6  * space and can use the device to read/write any valid virtual address.
7  */
8 #include <linux/init.h>
9 #include <linux/fs.h>
10 #include <linux/mm.h>
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/memremap.h>
16 #include <linux/mutex.h>
17 #include <linux/rwsem.h>
18 #include <linux/sched.h>
19 #include <linux/slab.h>
20 #include <linux/highmem.h>
21 #include <linux/delay.h>
22 #include <linux/pagemap.h>
23 #include <linux/hmm.h>
24 #include <linux/vmalloc.h>
25 #include <linux/swap.h>
26 #include <linux/swapops.h>
27 #include <linux/sched/mm.h>
28 #include <linux/platform_device.h>
29 #include <linux/rmap.h>
30 #include <linux/mmu_notifier.h>
31 #include <linux/migrate.h>
32
33 #include "test_hmm_uapi.h"
34
35 #define DMIRROR_NDEVICES                4
36 #define DMIRROR_RANGE_FAULT_TIMEOUT     1000
37 #define DEVMEM_CHUNK_SIZE               (256 * 1024 * 1024U)
38 #define DEVMEM_CHUNKS_RESERVE           16
39
40 /*
41  * For device_private pages, dpage is just a dummy struct page
42  * representing a piece of device memory. dmirror_devmem_alloc_page
43  * allocates a real system memory page as backing storage to fake a
44  * real device. zone_device_data points to that backing page. But
45  * for device_coherent memory, the struct page represents real
46  * physical CPU-accessible memory that we can use directly.
47  */
48 #define BACKING_PAGE(page) (is_device_private_page((page)) ? \
49                            (page)->zone_device_data : (page))
50
51 static unsigned long spm_addr_dev0;
52 module_param(spm_addr_dev0, long, 0644);
53 MODULE_PARM_DESC(spm_addr_dev0,
54                 "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
55
56 static unsigned long spm_addr_dev1;
57 module_param(spm_addr_dev1, long, 0644);
58 MODULE_PARM_DESC(spm_addr_dev1,
59                 "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
60
61 static const struct dev_pagemap_ops dmirror_devmem_ops;
62 static const struct mmu_interval_notifier_ops dmirror_min_ops;
63 static dev_t dmirror_dev;
64
65 struct dmirror_device;
66
67 struct dmirror_bounce {
68         void                    *ptr;
69         unsigned long           size;
70         unsigned long           addr;
71         unsigned long           cpages;
72 };
73
74 #define DPT_XA_TAG_ATOMIC 1UL
75 #define DPT_XA_TAG_WRITE 3UL
76
77 /*
78  * Data structure to track address ranges and register for mmu interval
79  * notifier updates.
80  */
81 struct dmirror_interval {
82         struct mmu_interval_notifier    notifier;
83         struct dmirror                  *dmirror;
84 };
85
86 /*
87  * Data attached to the open device file.
88  * Note that it might be shared after a fork().
89  */
90 struct dmirror {
91         struct dmirror_device           *mdevice;
92         struct xarray                   pt;
93         struct mmu_interval_notifier    notifier;
94         struct mutex                    mutex;
95 };
96
97 /*
98  * ZONE_DEVICE pages for migration and simulating device memory.
99  */
100 struct dmirror_chunk {
101         struct dev_pagemap      pagemap;
102         struct dmirror_device   *mdevice;
103 };
104
105 /*
106  * Per device data.
107  */
108 struct dmirror_device {
109         struct cdev             cdevice;
110         struct hmm_devmem       *devmem;
111         unsigned int            zone_device_type;
112
113         unsigned int            devmem_capacity;
114         unsigned int            devmem_count;
115         struct dmirror_chunk    **devmem_chunks;
116         struct mutex            devmem_lock;    /* protects the above */
117
118         unsigned long           calloc;
119         unsigned long           cfree;
120         struct page             *free_pages;
121         spinlock_t              lock;           /* protects the above */
122 };
123
124 static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES];
125
126 static int dmirror_bounce_init(struct dmirror_bounce *bounce,
127                                unsigned long addr,
128                                unsigned long size)
129 {
130         bounce->addr = addr;
131         bounce->size = size;
132         bounce->cpages = 0;
133         bounce->ptr = vmalloc(size);
134         if (!bounce->ptr)
135                 return -ENOMEM;
136         return 0;
137 }
138
139 static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
140 {
141         return (mdevice->zone_device_type ==
142                 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
143 }
144
145 static enum migrate_vma_direction
146 dmirror_select_device(struct dmirror *dmirror)
147 {
148         return (dmirror->mdevice->zone_device_type ==
149                 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
150                 MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
151                 MIGRATE_VMA_SELECT_DEVICE_COHERENT;
152 }
153
154 static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
155 {
156         vfree(bounce->ptr);
157 }
158
159 static int dmirror_fops_open(struct inode *inode, struct file *filp)
160 {
161         struct cdev *cdev = inode->i_cdev;
162         struct dmirror *dmirror;
163         int ret;
164
165         /* Mirror this process address space */
166         dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
167         if (dmirror == NULL)
168                 return -ENOMEM;
169
170         dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
171         mutex_init(&dmirror->mutex);
172         xa_init(&dmirror->pt);
173
174         ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
175                                 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
176         if (ret) {
177                 kfree(dmirror);
178                 return ret;
179         }
180
181         filp->private_data = dmirror;
182         return 0;
183 }
184
185 static int dmirror_fops_release(struct inode *inode, struct file *filp)
186 {
187         struct dmirror *dmirror = filp->private_data;
188
189         mmu_interval_notifier_remove(&dmirror->notifier);
190         xa_destroy(&dmirror->pt);
191         kfree(dmirror);
192         return 0;
193 }
194
195 static struct dmirror_device *dmirror_page_to_device(struct page *page)
196
197 {
198         return container_of(page->pgmap, struct dmirror_chunk,
199                             pagemap)->mdevice;
200 }
201
202 static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
203 {
204         unsigned long *pfns = range->hmm_pfns;
205         unsigned long pfn;
206
207         for (pfn = (range->start >> PAGE_SHIFT);
208              pfn < (range->end >> PAGE_SHIFT);
209              pfn++, pfns++) {
210                 struct page *page;
211                 void *entry;
212
213                 /*
214                  * Since we asked for hmm_range_fault() to populate pages,
215                  * it shouldn't return an error entry on success.
216                  */
217                 WARN_ON(*pfns & HMM_PFN_ERROR);
218                 WARN_ON(!(*pfns & HMM_PFN_VALID));
219
220                 page = hmm_pfn_to_page(*pfns);
221                 WARN_ON(!page);
222
223                 entry = page;
224                 if (*pfns & HMM_PFN_WRITE)
225                         entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
226                 else if (WARN_ON(range->default_flags & HMM_PFN_WRITE))
227                         return -EFAULT;
228                 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
229                 if (xa_is_err(entry))
230                         return xa_err(entry);
231         }
232
233         return 0;
234 }
235
236 static void dmirror_do_update(struct dmirror *dmirror, unsigned long start,
237                               unsigned long end)
238 {
239         unsigned long pfn;
240         void *entry;
241
242         /*
243          * The XArray doesn't hold references to pages since it relies on
244          * the mmu notifier to clear page pointers when they become stale.
245          * Therefore, it is OK to just clear the entry.
246          */
247         xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT,
248                           end >> PAGE_SHIFT)
249                 xa_erase(&dmirror->pt, pfn);
250 }
251
252 static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
253                                 const struct mmu_notifier_range *range,
254                                 unsigned long cur_seq)
255 {
256         struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
257
258         /*
259          * Ignore invalidation callbacks for device private pages since
260          * the invalidation is handled as part of the migration process.
261          */
262         if (range->event == MMU_NOTIFY_MIGRATE &&
263             range->owner == dmirror->mdevice)
264                 return true;
265
266         if (mmu_notifier_range_blockable(range))
267                 mutex_lock(&dmirror->mutex);
268         else if (!mutex_trylock(&dmirror->mutex))
269                 return false;
270
271         mmu_interval_set_seq(mni, cur_seq);
272         dmirror_do_update(dmirror, range->start, range->end);
273
274         mutex_unlock(&dmirror->mutex);
275         return true;
276 }
277
278 static const struct mmu_interval_notifier_ops dmirror_min_ops = {
279         .invalidate = dmirror_interval_invalidate,
280 };
281
282 static int dmirror_range_fault(struct dmirror *dmirror,
283                                 struct hmm_range *range)
284 {
285         struct mm_struct *mm = dmirror->notifier.mm;
286         unsigned long timeout =
287                 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
288         int ret;
289
290         while (true) {
291                 if (time_after(jiffies, timeout)) {
292                         ret = -EBUSY;
293                         goto out;
294                 }
295
296                 range->notifier_seq = mmu_interval_read_begin(range->notifier);
297                 mmap_read_lock(mm);
298                 ret = hmm_range_fault(range);
299                 mmap_read_unlock(mm);
300                 if (ret) {
301                         if (ret == -EBUSY)
302                                 continue;
303                         goto out;
304                 }
305
306                 mutex_lock(&dmirror->mutex);
307                 if (mmu_interval_read_retry(range->notifier,
308                                             range->notifier_seq)) {
309                         mutex_unlock(&dmirror->mutex);
310                         continue;
311                 }
312                 break;
313         }
314
315         ret = dmirror_do_fault(dmirror, range);
316
317         mutex_unlock(&dmirror->mutex);
318 out:
319         return ret;
320 }
321
322 static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
323                          unsigned long end, bool write)
324 {
325         struct mm_struct *mm = dmirror->notifier.mm;
326         unsigned long addr;
327         unsigned long pfns[64];
328         struct hmm_range range = {
329                 .notifier = &dmirror->notifier,
330                 .hmm_pfns = pfns,
331                 .pfn_flags_mask = 0,
332                 .default_flags =
333                         HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0),
334                 .dev_private_owner = dmirror->mdevice,
335         };
336         int ret = 0;
337
338         /* Since the mm is for the mirrored process, get a reference first. */
339         if (!mmget_not_zero(mm))
340                 return 0;
341
342         for (addr = start; addr < end; addr = range.end) {
343                 range.start = addr;
344                 range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
345
346                 ret = dmirror_range_fault(dmirror, &range);
347                 if (ret)
348                         break;
349         }
350
351         mmput(mm);
352         return ret;
353 }
354
355 static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
356                            unsigned long end, struct dmirror_bounce *bounce)
357 {
358         unsigned long pfn;
359         void *ptr;
360
361         ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
362
363         for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
364                 void *entry;
365                 struct page *page;
366                 void *tmp;
367
368                 entry = xa_load(&dmirror->pt, pfn);
369                 page = xa_untag_pointer(entry);
370                 if (!page)
371                         return -ENOENT;
372
373                 tmp = kmap(page);
374                 memcpy(ptr, tmp, PAGE_SIZE);
375                 kunmap(page);
376
377                 ptr += PAGE_SIZE;
378                 bounce->cpages++;
379         }
380
381         return 0;
382 }
383
384 static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
385 {
386         struct dmirror_bounce bounce;
387         unsigned long start, end;
388         unsigned long size = cmd->npages << PAGE_SHIFT;
389         int ret;
390
391         start = cmd->addr;
392         end = start + size;
393         if (end < start)
394                 return -EINVAL;
395
396         ret = dmirror_bounce_init(&bounce, start, size);
397         if (ret)
398                 return ret;
399
400         while (1) {
401                 mutex_lock(&dmirror->mutex);
402                 ret = dmirror_do_read(dmirror, start, end, &bounce);
403                 mutex_unlock(&dmirror->mutex);
404                 if (ret != -ENOENT)
405                         break;
406
407                 start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
408                 ret = dmirror_fault(dmirror, start, end, false);
409                 if (ret)
410                         break;
411                 cmd->faults++;
412         }
413
414         if (ret == 0) {
415                 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
416                                  bounce.size))
417                         ret = -EFAULT;
418         }
419         cmd->cpages = bounce.cpages;
420         dmirror_bounce_fini(&bounce);
421         return ret;
422 }
423
424 static int dmirror_do_write(struct dmirror *dmirror, unsigned long start,
425                             unsigned long end, struct dmirror_bounce *bounce)
426 {
427         unsigned long pfn;
428         void *ptr;
429
430         ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
431
432         for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
433                 void *entry;
434                 struct page *page;
435                 void *tmp;
436
437                 entry = xa_load(&dmirror->pt, pfn);
438                 page = xa_untag_pointer(entry);
439                 if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE)
440                         return -ENOENT;
441
442                 tmp = kmap(page);
443                 memcpy(tmp, ptr, PAGE_SIZE);
444                 kunmap(page);
445
446                 ptr += PAGE_SIZE;
447                 bounce->cpages++;
448         }
449
450         return 0;
451 }
452
453 static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
454 {
455         struct dmirror_bounce bounce;
456         unsigned long start, end;
457         unsigned long size = cmd->npages << PAGE_SHIFT;
458         int ret;
459
460         start = cmd->addr;
461         end = start + size;
462         if (end < start)
463                 return -EINVAL;
464
465         ret = dmirror_bounce_init(&bounce, start, size);
466         if (ret)
467                 return ret;
468         if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr),
469                            bounce.size)) {
470                 ret = -EFAULT;
471                 goto fini;
472         }
473
474         while (1) {
475                 mutex_lock(&dmirror->mutex);
476                 ret = dmirror_do_write(dmirror, start, end, &bounce);
477                 mutex_unlock(&dmirror->mutex);
478                 if (ret != -ENOENT)
479                         break;
480
481                 start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
482                 ret = dmirror_fault(dmirror, start, end, true);
483                 if (ret)
484                         break;
485                 cmd->faults++;
486         }
487
488 fini:
489         cmd->cpages = bounce.cpages;
490         dmirror_bounce_fini(&bounce);
491         return ret;
492 }
493
494 static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
495                                    struct page **ppage)
496 {
497         struct dmirror_chunk *devmem;
498         struct resource *res = NULL;
499         unsigned long pfn;
500         unsigned long pfn_first;
501         unsigned long pfn_last;
502         void *ptr;
503         int ret = -ENOMEM;
504
505         devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
506         if (!devmem)
507                 return ret;
508
509         switch (mdevice->zone_device_type) {
510         case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE:
511                 res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
512                                               "hmm_dmirror");
513                 if (IS_ERR_OR_NULL(res))
514                         goto err_devmem;
515                 devmem->pagemap.range.start = res->start;
516                 devmem->pagemap.range.end = res->end;
517                 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
518                 break;
519         case HMM_DMIRROR_MEMORY_DEVICE_COHERENT:
520                 devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ?
521                                                         spm_addr_dev0 :
522                                                         spm_addr_dev1;
523                 devmem->pagemap.range.end = devmem->pagemap.range.start +
524                                             DEVMEM_CHUNK_SIZE - 1;
525                 devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
526                 break;
527         default:
528                 ret = -EINVAL;
529                 goto err_devmem;
530         }
531
532         devmem->pagemap.nr_range = 1;
533         devmem->pagemap.ops = &dmirror_devmem_ops;
534         devmem->pagemap.owner = mdevice;
535
536         mutex_lock(&mdevice->devmem_lock);
537
538         if (mdevice->devmem_count == mdevice->devmem_capacity) {
539                 struct dmirror_chunk **new_chunks;
540                 unsigned int new_capacity;
541
542                 new_capacity = mdevice->devmem_capacity +
543                                 DEVMEM_CHUNKS_RESERVE;
544                 new_chunks = krealloc(mdevice->devmem_chunks,
545                                 sizeof(new_chunks[0]) * new_capacity,
546                                 GFP_KERNEL);
547                 if (!new_chunks)
548                         goto err_release;
549                 mdevice->devmem_capacity = new_capacity;
550                 mdevice->devmem_chunks = new_chunks;
551         }
552         ptr = memremap_pages(&devmem->pagemap, numa_node_id());
553         if (IS_ERR_OR_NULL(ptr)) {
554                 if (ptr)
555                         ret = PTR_ERR(ptr);
556                 else
557                         ret = -EFAULT;
558                 goto err_release;
559         }
560
561         devmem->mdevice = mdevice;
562         pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
563         pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT);
564         mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
565
566         mutex_unlock(&mdevice->devmem_lock);
567
568         pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
569                 DEVMEM_CHUNK_SIZE / (1024 * 1024),
570                 mdevice->devmem_count,
571                 mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)),
572                 pfn_first, pfn_last);
573
574         spin_lock(&mdevice->lock);
575         for (pfn = pfn_first; pfn < pfn_last; pfn++) {
576                 struct page *page = pfn_to_page(pfn);
577
578                 page->zone_device_data = mdevice->free_pages;
579                 mdevice->free_pages = page;
580         }
581         if (ppage) {
582                 *ppage = mdevice->free_pages;
583                 mdevice->free_pages = (*ppage)->zone_device_data;
584                 mdevice->calloc++;
585         }
586         spin_unlock(&mdevice->lock);
587
588         return 0;
589
590 err_release:
591         mutex_unlock(&mdevice->devmem_lock);
592         if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
593                 release_mem_region(devmem->pagemap.range.start,
594                                    range_len(&devmem->pagemap.range));
595 err_devmem:
596         kfree(devmem);
597
598         return ret;
599 }
600
601 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
602 {
603         struct page *dpage = NULL;
604         struct page *rpage = NULL;
605
606         /*
607          * For ZONE_DEVICE private type, this is a fake device so we allocate
608          * real system memory to store our device memory.
609          * For ZONE_DEVICE coherent type we use the actual dpage to store the
610          * data and ignore rpage.
611          */
612         if (dmirror_is_private_zone(mdevice)) {
613                 rpage = alloc_page(GFP_HIGHUSER);
614                 if (!rpage)
615                         return NULL;
616         }
617         spin_lock(&mdevice->lock);
618
619         if (mdevice->free_pages) {
620                 dpage = mdevice->free_pages;
621                 mdevice->free_pages = dpage->zone_device_data;
622                 mdevice->calloc++;
623                 spin_unlock(&mdevice->lock);
624         } else {
625                 spin_unlock(&mdevice->lock);
626                 if (dmirror_allocate_chunk(mdevice, &dpage))
627                         goto error;
628         }
629
630         dpage->zone_device_data = rpage;
631         lock_page(dpage);
632         return dpage;
633
634 error:
635         if (rpage)
636                 __free_page(rpage);
637         return NULL;
638 }
639
640 static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
641                                            struct dmirror *dmirror)
642 {
643         struct dmirror_device *mdevice = dmirror->mdevice;
644         const unsigned long *src = args->src;
645         unsigned long *dst = args->dst;
646         unsigned long addr;
647
648         for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
649                                                    src++, dst++) {
650                 struct page *spage;
651                 struct page *dpage;
652                 struct page *rpage;
653
654                 if (!(*src & MIGRATE_PFN_MIGRATE))
655                         continue;
656
657                 /*
658                  * Note that spage might be NULL which is OK since it is an
659                  * unallocated pte_none() or read-only zero page.
660                  */
661                 spage = migrate_pfn_to_page(*src);
662                 if (WARN(spage && is_zone_device_page(spage),
663                      "page already in device spage pfn: 0x%lx\n",
664                      page_to_pfn(spage)))
665                         continue;
666
667                 dpage = dmirror_devmem_alloc_page(mdevice);
668                 if (!dpage)
669                         continue;
670
671                 rpage = BACKING_PAGE(dpage);
672                 if (spage)
673                         copy_highpage(rpage, spage);
674                 else
675                         clear_highpage(rpage);
676
677                 /*
678                  * Normally, a device would use the page->zone_device_data to
679                  * point to the mirror but here we use it to hold the page for
680                  * the simulated device memory and that page holds the pointer
681                  * to the mirror.
682                  */
683                 rpage->zone_device_data = dmirror;
684
685                 pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
686                          page_to_pfn(spage), page_to_pfn(dpage));
687                 *dst = migrate_pfn(page_to_pfn(dpage));
688                 if ((*src & MIGRATE_PFN_WRITE) ||
689                     (!spage && args->vma->vm_flags & VM_WRITE))
690                         *dst |= MIGRATE_PFN_WRITE;
691         }
692 }
693
694 static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start,
695                              unsigned long end)
696 {
697         unsigned long pfn;
698
699         for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
700                 void *entry;
701
702                 entry = xa_load(&dmirror->pt, pfn);
703                 if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC)
704                         return -EPERM;
705         }
706
707         return 0;
708 }
709
710 static int dmirror_atomic_map(unsigned long start, unsigned long end,
711                               struct page **pages, struct dmirror *dmirror)
712 {
713         unsigned long pfn, mapped = 0;
714         int i;
715
716         /* Map the migrated pages into the device's page tables. */
717         mutex_lock(&dmirror->mutex);
718
719         for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) {
720                 void *entry;
721
722                 if (!pages[i])
723                         continue;
724
725                 entry = pages[i];
726                 entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC);
727                 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
728                 if (xa_is_err(entry)) {
729                         mutex_unlock(&dmirror->mutex);
730                         return xa_err(entry);
731                 }
732
733                 mapped++;
734         }
735
736         mutex_unlock(&dmirror->mutex);
737         return mapped;
738 }
739
740 static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
741                                             struct dmirror *dmirror)
742 {
743         unsigned long start = args->start;
744         unsigned long end = args->end;
745         const unsigned long *src = args->src;
746         const unsigned long *dst = args->dst;
747         unsigned long pfn;
748
749         /* Map the migrated pages into the device's page tables. */
750         mutex_lock(&dmirror->mutex);
751
752         for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
753                                                                 src++, dst++) {
754                 struct page *dpage;
755                 void *entry;
756
757                 if (!(*src & MIGRATE_PFN_MIGRATE))
758                         continue;
759
760                 dpage = migrate_pfn_to_page(*dst);
761                 if (!dpage)
762                         continue;
763
764                 entry = BACKING_PAGE(dpage);
765                 if (*dst & MIGRATE_PFN_WRITE)
766                         entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
767                 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
768                 if (xa_is_err(entry)) {
769                         mutex_unlock(&dmirror->mutex);
770                         return xa_err(entry);
771                 }
772         }
773
774         mutex_unlock(&dmirror->mutex);
775         return 0;
776 }
777
778 static int dmirror_exclusive(struct dmirror *dmirror,
779                              struct hmm_dmirror_cmd *cmd)
780 {
781         unsigned long start, end, addr;
782         unsigned long size = cmd->npages << PAGE_SHIFT;
783         struct mm_struct *mm = dmirror->notifier.mm;
784         struct page *pages[64];
785         struct dmirror_bounce bounce;
786         unsigned long next;
787         int ret;
788
789         start = cmd->addr;
790         end = start + size;
791         if (end < start)
792                 return -EINVAL;
793
794         /* Since the mm is for the mirrored process, get a reference first. */
795         if (!mmget_not_zero(mm))
796                 return -EINVAL;
797
798         mmap_read_lock(mm);
799         for (addr = start; addr < end; addr = next) {
800                 unsigned long mapped = 0;
801                 int i;
802
803                 if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
804                         next = end;
805                 else
806                         next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
807
808                 ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
809                 /*
810                  * Do dmirror_atomic_map() iff all pages are marked for
811                  * exclusive access to avoid accessing uninitialized
812                  * fields of pages.
813                  */
814                 if (ret == (next - addr) >> PAGE_SHIFT)
815                         mapped = dmirror_atomic_map(addr, next, pages, dmirror);
816                 for (i = 0; i < ret; i++) {
817                         if (pages[i]) {
818                                 unlock_page(pages[i]);
819                                 put_page(pages[i]);
820                         }
821                 }
822
823                 if (addr + (mapped << PAGE_SHIFT) < next) {
824                         mmap_read_unlock(mm);
825                         mmput(mm);
826                         return -EBUSY;
827                 }
828         }
829         mmap_read_unlock(mm);
830         mmput(mm);
831
832         /* Return the migrated data for verification. */
833         ret = dmirror_bounce_init(&bounce, start, size);
834         if (ret)
835                 return ret;
836         mutex_lock(&dmirror->mutex);
837         ret = dmirror_do_read(dmirror, start, end, &bounce);
838         mutex_unlock(&dmirror->mutex);
839         if (ret == 0) {
840                 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
841                                  bounce.size))
842                         ret = -EFAULT;
843         }
844
845         cmd->cpages = bounce.cpages;
846         dmirror_bounce_fini(&bounce);
847         return ret;
848 }
849
850 static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
851                                                       struct dmirror *dmirror)
852 {
853         const unsigned long *src = args->src;
854         unsigned long *dst = args->dst;
855         unsigned long start = args->start;
856         unsigned long end = args->end;
857         unsigned long addr;
858
859         for (addr = start; addr < end; addr += PAGE_SIZE,
860                                        src++, dst++) {
861                 struct page *dpage, *spage;
862
863                 spage = migrate_pfn_to_page(*src);
864                 if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
865                         continue;
866
867                 if (WARN_ON(!is_device_private_page(spage) &&
868                             !is_device_coherent_page(spage)))
869                         continue;
870                 spage = BACKING_PAGE(spage);
871                 dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
872                 if (!dpage)
873                         continue;
874                 pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
875                          page_to_pfn(spage), page_to_pfn(dpage));
876
877                 lock_page(dpage);
878                 xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
879                 copy_highpage(dpage, spage);
880                 *dst = migrate_pfn(page_to_pfn(dpage));
881                 if (*src & MIGRATE_PFN_WRITE)
882                         *dst |= MIGRATE_PFN_WRITE;
883         }
884         return 0;
885 }
886
887 static unsigned long
888 dmirror_successful_migrated_pages(struct migrate_vma *migrate)
889 {
890         unsigned long cpages = 0;
891         unsigned long i;
892
893         for (i = 0; i < migrate->npages; i++) {
894                 if (migrate->src[i] & MIGRATE_PFN_VALID &&
895                     migrate->src[i] & MIGRATE_PFN_MIGRATE)
896                         cpages++;
897         }
898         return cpages;
899 }
900
901 static int dmirror_migrate_to_system(struct dmirror *dmirror,
902                                      struct hmm_dmirror_cmd *cmd)
903 {
904         unsigned long start, end, addr;
905         unsigned long size = cmd->npages << PAGE_SHIFT;
906         struct mm_struct *mm = dmirror->notifier.mm;
907         struct vm_area_struct *vma;
908         unsigned long src_pfns[64] = { 0 };
909         unsigned long dst_pfns[64] = { 0 };
910         struct migrate_vma args;
911         unsigned long next;
912         int ret;
913
914         start = cmd->addr;
915         end = start + size;
916         if (end < start)
917                 return -EINVAL;
918
919         /* Since the mm is for the mirrored process, get a reference first. */
920         if (!mmget_not_zero(mm))
921                 return -EINVAL;
922
923         cmd->cpages = 0;
924         mmap_read_lock(mm);
925         for (addr = start; addr < end; addr = next) {
926                 vma = vma_lookup(mm, addr);
927                 if (!vma || !(vma->vm_flags & VM_READ)) {
928                         ret = -EINVAL;
929                         goto out;
930                 }
931                 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
932                 if (next > vma->vm_end)
933                         next = vma->vm_end;
934
935                 args.vma = vma;
936                 args.src = src_pfns;
937                 args.dst = dst_pfns;
938                 args.start = addr;
939                 args.end = next;
940                 args.pgmap_owner = dmirror->mdevice;
941                 args.flags = dmirror_select_device(dmirror);
942
943                 ret = migrate_vma_setup(&args);
944                 if (ret)
945                         goto out;
946
947                 pr_debug("Migrating from device mem to sys mem\n");
948                 dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
949
950                 migrate_vma_pages(&args);
951                 cmd->cpages += dmirror_successful_migrated_pages(&args);
952                 migrate_vma_finalize(&args);
953         }
954 out:
955         mmap_read_unlock(mm);
956         mmput(mm);
957
958         return ret;
959 }
960
961 static int dmirror_migrate_to_device(struct dmirror *dmirror,
962                                 struct hmm_dmirror_cmd *cmd)
963 {
964         unsigned long start, end, addr;
965         unsigned long size = cmd->npages << PAGE_SHIFT;
966         struct mm_struct *mm = dmirror->notifier.mm;
967         struct vm_area_struct *vma;
968         unsigned long src_pfns[64] = { 0 };
969         unsigned long dst_pfns[64] = { 0 };
970         struct dmirror_bounce bounce;
971         struct migrate_vma args;
972         unsigned long next;
973         int ret;
974
975         start = cmd->addr;
976         end = start + size;
977         if (end < start)
978                 return -EINVAL;
979
980         /* Since the mm is for the mirrored process, get a reference first. */
981         if (!mmget_not_zero(mm))
982                 return -EINVAL;
983
984         mmap_read_lock(mm);
985         for (addr = start; addr < end; addr = next) {
986                 vma = vma_lookup(mm, addr);
987                 if (!vma || !(vma->vm_flags & VM_READ)) {
988                         ret = -EINVAL;
989                         goto out;
990                 }
991                 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
992                 if (next > vma->vm_end)
993                         next = vma->vm_end;
994
995                 args.vma = vma;
996                 args.src = src_pfns;
997                 args.dst = dst_pfns;
998                 args.start = addr;
999                 args.end = next;
1000                 args.pgmap_owner = dmirror->mdevice;
1001                 args.flags = MIGRATE_VMA_SELECT_SYSTEM;
1002                 ret = migrate_vma_setup(&args);
1003                 if (ret)
1004                         goto out;
1005
1006                 pr_debug("Migrating from sys mem to device mem\n");
1007                 dmirror_migrate_alloc_and_copy(&args, dmirror);
1008                 migrate_vma_pages(&args);
1009                 dmirror_migrate_finalize_and_map(&args, dmirror);
1010                 migrate_vma_finalize(&args);
1011         }
1012         mmap_read_unlock(mm);
1013         mmput(mm);
1014
1015         /*
1016          * Return the migrated data for verification.
1017          * Only for pages in device zone
1018          */
1019         ret = dmirror_bounce_init(&bounce, start, size);
1020         if (ret)
1021                 return ret;
1022         mutex_lock(&dmirror->mutex);
1023         ret = dmirror_do_read(dmirror, start, end, &bounce);
1024         mutex_unlock(&dmirror->mutex);
1025         if (ret == 0) {
1026                 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
1027                                  bounce.size))
1028                         ret = -EFAULT;
1029         }
1030         cmd->cpages = bounce.cpages;
1031         dmirror_bounce_fini(&bounce);
1032         return ret;
1033
1034 out:
1035         mmap_read_unlock(mm);
1036         mmput(mm);
1037         return ret;
1038 }
1039
1040 static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
1041                             unsigned char *perm, unsigned long entry)
1042 {
1043         struct page *page;
1044
1045         if (entry & HMM_PFN_ERROR) {
1046                 *perm = HMM_DMIRROR_PROT_ERROR;
1047                 return;
1048         }
1049         if (!(entry & HMM_PFN_VALID)) {
1050                 *perm = HMM_DMIRROR_PROT_NONE;
1051                 return;
1052         }
1053
1054         page = hmm_pfn_to_page(entry);
1055         if (is_device_private_page(page)) {
1056                 /* Is the page migrated to this device or some other? */
1057                 if (dmirror->mdevice == dmirror_page_to_device(page))
1058                         *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
1059                 else
1060                         *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
1061         } else if (is_device_coherent_page(page)) {
1062                 /* Is the page migrated to this device or some other? */
1063                 if (dmirror->mdevice == dmirror_page_to_device(page))
1064                         *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
1065                 else
1066                         *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
1067         } else if (is_zero_pfn(page_to_pfn(page)))
1068                 *perm = HMM_DMIRROR_PROT_ZERO;
1069         else
1070                 *perm = HMM_DMIRROR_PROT_NONE;
1071         if (entry & HMM_PFN_WRITE)
1072                 *perm |= HMM_DMIRROR_PROT_WRITE;
1073         else
1074                 *perm |= HMM_DMIRROR_PROT_READ;
1075         if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT)
1076                 *perm |= HMM_DMIRROR_PROT_PMD;
1077         else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT)
1078                 *perm |= HMM_DMIRROR_PROT_PUD;
1079 }
1080
1081 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni,
1082                                 const struct mmu_notifier_range *range,
1083                                 unsigned long cur_seq)
1084 {
1085         struct dmirror_interval *dmi =
1086                 container_of(mni, struct dmirror_interval, notifier);
1087         struct dmirror *dmirror = dmi->dmirror;
1088
1089         if (mmu_notifier_range_blockable(range))
1090                 mutex_lock(&dmirror->mutex);
1091         else if (!mutex_trylock(&dmirror->mutex))
1092                 return false;
1093
1094         /*
1095          * Snapshots only need to set the sequence number since any
1096          * invalidation in the interval invalidates the whole snapshot.
1097          */
1098         mmu_interval_set_seq(mni, cur_seq);
1099
1100         mutex_unlock(&dmirror->mutex);
1101         return true;
1102 }
1103
1104 static const struct mmu_interval_notifier_ops dmirror_mrn_ops = {
1105         .invalidate = dmirror_snapshot_invalidate,
1106 };
1107
1108 static int dmirror_range_snapshot(struct dmirror *dmirror,
1109                                   struct hmm_range *range,
1110                                   unsigned char *perm)
1111 {
1112         struct mm_struct *mm = dmirror->notifier.mm;
1113         struct dmirror_interval notifier;
1114         unsigned long timeout =
1115                 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1116         unsigned long i;
1117         unsigned long n;
1118         int ret = 0;
1119
1120         notifier.dmirror = dmirror;
1121         range->notifier = &notifier.notifier;
1122
1123         ret = mmu_interval_notifier_insert(range->notifier, mm,
1124                         range->start, range->end - range->start,
1125                         &dmirror_mrn_ops);
1126         if (ret)
1127                 return ret;
1128
1129         while (true) {
1130                 if (time_after(jiffies, timeout)) {
1131                         ret = -EBUSY;
1132                         goto out;
1133                 }
1134
1135                 range->notifier_seq = mmu_interval_read_begin(range->notifier);
1136
1137                 mmap_read_lock(mm);
1138                 ret = hmm_range_fault(range);
1139                 mmap_read_unlock(mm);
1140                 if (ret) {
1141                         if (ret == -EBUSY)
1142                                 continue;
1143                         goto out;
1144                 }
1145
1146                 mutex_lock(&dmirror->mutex);
1147                 if (mmu_interval_read_retry(range->notifier,
1148                                             range->notifier_seq)) {
1149                         mutex_unlock(&dmirror->mutex);
1150                         continue;
1151                 }
1152                 break;
1153         }
1154
1155         n = (range->end - range->start) >> PAGE_SHIFT;
1156         for (i = 0; i < n; i++)
1157                 dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]);
1158
1159         mutex_unlock(&dmirror->mutex);
1160 out:
1161         mmu_interval_notifier_remove(range->notifier);
1162         return ret;
1163 }
1164
1165 static int dmirror_snapshot(struct dmirror *dmirror,
1166                             struct hmm_dmirror_cmd *cmd)
1167 {
1168         struct mm_struct *mm = dmirror->notifier.mm;
1169         unsigned long start, end;
1170         unsigned long size = cmd->npages << PAGE_SHIFT;
1171         unsigned long addr;
1172         unsigned long next;
1173         unsigned long pfns[64];
1174         unsigned char perm[64];
1175         char __user *uptr;
1176         struct hmm_range range = {
1177                 .hmm_pfns = pfns,
1178                 .dev_private_owner = dmirror->mdevice,
1179         };
1180         int ret = 0;
1181
1182         start = cmd->addr;
1183         end = start + size;
1184         if (end < start)
1185                 return -EINVAL;
1186
1187         /* Since the mm is for the mirrored process, get a reference first. */
1188         if (!mmget_not_zero(mm))
1189                 return -EINVAL;
1190
1191         /*
1192          * Register a temporary notifier to detect invalidations even if it
1193          * overlaps with other mmu_interval_notifiers.
1194          */
1195         uptr = u64_to_user_ptr(cmd->ptr);
1196         for (addr = start; addr < end; addr = next) {
1197                 unsigned long n;
1198
1199                 next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
1200                 range.start = addr;
1201                 range.end = next;
1202
1203                 ret = dmirror_range_snapshot(dmirror, &range, perm);
1204                 if (ret)
1205                         break;
1206
1207                 n = (range.end - range.start) >> PAGE_SHIFT;
1208                 if (copy_to_user(uptr, perm, n)) {
1209                         ret = -EFAULT;
1210                         break;
1211                 }
1212
1213                 cmd->cpages += n;
1214                 uptr += n;
1215         }
1216         mmput(mm);
1217
1218         return ret;
1219 }
1220
1221 static long dmirror_fops_unlocked_ioctl(struct file *filp,
1222                                         unsigned int command,
1223                                         unsigned long arg)
1224 {
1225         void __user *uarg = (void __user *)arg;
1226         struct hmm_dmirror_cmd cmd;
1227         struct dmirror *dmirror;
1228         int ret;
1229
1230         dmirror = filp->private_data;
1231         if (!dmirror)
1232                 return -EINVAL;
1233
1234         if (copy_from_user(&cmd, uarg, sizeof(cmd)))
1235                 return -EFAULT;
1236
1237         if (cmd.addr & ~PAGE_MASK)
1238                 return -EINVAL;
1239         if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT)))
1240                 return -EINVAL;
1241
1242         cmd.cpages = 0;
1243         cmd.faults = 0;
1244
1245         switch (command) {
1246         case HMM_DMIRROR_READ:
1247                 ret = dmirror_read(dmirror, &cmd);
1248                 break;
1249
1250         case HMM_DMIRROR_WRITE:
1251                 ret = dmirror_write(dmirror, &cmd);
1252                 break;
1253
1254         case HMM_DMIRROR_MIGRATE_TO_DEV:
1255                 ret = dmirror_migrate_to_device(dmirror, &cmd);
1256                 break;
1257
1258         case HMM_DMIRROR_MIGRATE_TO_SYS:
1259                 ret = dmirror_migrate_to_system(dmirror, &cmd);
1260                 break;
1261
1262         case HMM_DMIRROR_EXCLUSIVE:
1263                 ret = dmirror_exclusive(dmirror, &cmd);
1264                 break;
1265
1266         case HMM_DMIRROR_CHECK_EXCLUSIVE:
1267                 ret = dmirror_check_atomic(dmirror, cmd.addr,
1268                                         cmd.addr + (cmd.npages << PAGE_SHIFT));
1269                 break;
1270
1271         case HMM_DMIRROR_SNAPSHOT:
1272                 ret = dmirror_snapshot(dmirror, &cmd);
1273                 break;
1274
1275         default:
1276                 return -EINVAL;
1277         }
1278         if (ret)
1279                 return ret;
1280
1281         if (copy_to_user(uarg, &cmd, sizeof(cmd)))
1282                 return -EFAULT;
1283
1284         return 0;
1285 }
1286
1287 static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
1288 {
1289         unsigned long addr;
1290
1291         for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
1292                 struct page *page;
1293                 int ret;
1294
1295                 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1296                 if (!page)
1297                         return -ENOMEM;
1298
1299                 ret = vm_insert_page(vma, addr, page);
1300                 if (ret) {
1301                         __free_page(page);
1302                         return ret;
1303                 }
1304                 put_page(page);
1305         }
1306
1307         return 0;
1308 }
1309
1310 static const struct file_operations dmirror_fops = {
1311         .open           = dmirror_fops_open,
1312         .release        = dmirror_fops_release,
1313         .mmap           = dmirror_fops_mmap,
1314         .unlocked_ioctl = dmirror_fops_unlocked_ioctl,
1315         .llseek         = default_llseek,
1316         .owner          = THIS_MODULE,
1317 };
1318
1319 static void dmirror_devmem_free(struct page *page)
1320 {
1321         struct page *rpage = BACKING_PAGE(page);
1322         struct dmirror_device *mdevice;
1323
1324         if (rpage != page)
1325                 __free_page(rpage);
1326
1327         mdevice = dmirror_page_to_device(page);
1328         spin_lock(&mdevice->lock);
1329         mdevice->cfree++;
1330         page->zone_device_data = mdevice->free_pages;
1331         mdevice->free_pages = page;
1332         spin_unlock(&mdevice->lock);
1333 }
1334
1335 static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
1336 {
1337         struct migrate_vma args;
1338         unsigned long src_pfns = 0;
1339         unsigned long dst_pfns = 0;
1340         struct page *rpage;
1341         struct dmirror *dmirror;
1342         vm_fault_t ret;
1343
1344         /*
1345          * Normally, a device would use the page->zone_device_data to point to
1346          * the mirror but here we use it to hold the page for the simulated
1347          * device memory and that page holds the pointer to the mirror.
1348          */
1349         rpage = vmf->page->zone_device_data;
1350         dmirror = rpage->zone_device_data;
1351
1352         /* FIXME demonstrate how we can adjust migrate range */
1353         args.vma = vmf->vma;
1354         args.start = vmf->address;
1355         args.end = args.start + PAGE_SIZE;
1356         args.src = &src_pfns;
1357         args.dst = &dst_pfns;
1358         args.pgmap_owner = dmirror->mdevice;
1359         args.flags = dmirror_select_device(dmirror);
1360
1361         if (migrate_vma_setup(&args))
1362                 return VM_FAULT_SIGBUS;
1363
1364         ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
1365         if (ret)
1366                 return ret;
1367         migrate_vma_pages(&args);
1368         /*
1369          * No device finalize step is needed since
1370          * dmirror_devmem_fault_alloc_and_copy() will have already
1371          * invalidated the device page table.
1372          */
1373         migrate_vma_finalize(&args);
1374         return 0;
1375 }
1376
1377 static const struct dev_pagemap_ops dmirror_devmem_ops = {
1378         .page_free      = dmirror_devmem_free,
1379         .migrate_to_ram = dmirror_devmem_fault,
1380 };
1381
1382 static int dmirror_device_init(struct dmirror_device *mdevice, int id)
1383 {
1384         dev_t dev;
1385         int ret;
1386
1387         dev = MKDEV(MAJOR(dmirror_dev), id);
1388         mutex_init(&mdevice->devmem_lock);
1389         spin_lock_init(&mdevice->lock);
1390
1391         cdev_init(&mdevice->cdevice, &dmirror_fops);
1392         mdevice->cdevice.owner = THIS_MODULE;
1393         ret = cdev_add(&mdevice->cdevice, dev, 1);
1394         if (ret)
1395                 return ret;
1396
1397         /* Build a list of free ZONE_DEVICE struct pages */
1398         return dmirror_allocate_chunk(mdevice, NULL);
1399 }
1400
1401 static void dmirror_device_remove(struct dmirror_device *mdevice)
1402 {
1403         unsigned int i;
1404
1405         if (mdevice->devmem_chunks) {
1406                 for (i = 0; i < mdevice->devmem_count; i++) {
1407                         struct dmirror_chunk *devmem =
1408                                 mdevice->devmem_chunks[i];
1409
1410                         memunmap_pages(&devmem->pagemap);
1411                         if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
1412                                 release_mem_region(devmem->pagemap.range.start,
1413                                                    range_len(&devmem->pagemap.range));
1414                         kfree(devmem);
1415                 }
1416                 kfree(mdevice->devmem_chunks);
1417         }
1418
1419         cdev_del(&mdevice->cdevice);
1420 }
1421
1422 static int __init hmm_dmirror_init(void)
1423 {
1424         int ret;
1425         int id = 0;
1426         int ndevices = 0;
1427
1428         ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,
1429                                   "HMM_DMIRROR");
1430         if (ret)
1431                 goto err_unreg;
1432
1433         memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0]));
1434         dmirror_devices[ndevices++].zone_device_type =
1435                                 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
1436         dmirror_devices[ndevices++].zone_device_type =
1437                                 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
1438         if (spm_addr_dev0 && spm_addr_dev1) {
1439                 dmirror_devices[ndevices++].zone_device_type =
1440                                         HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
1441                 dmirror_devices[ndevices++].zone_device_type =
1442                                         HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
1443         }
1444         for (id = 0; id < ndevices; id++) {
1445                 ret = dmirror_device_init(dmirror_devices + id, id);
1446                 if (ret)
1447                         goto err_chrdev;
1448         }
1449
1450         pr_info("HMM test module loaded. This is only for testing HMM.\n");
1451         return 0;
1452
1453 err_chrdev:
1454         while (--id >= 0)
1455                 dmirror_device_remove(dmirror_devices + id);
1456         unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1457 err_unreg:
1458         return ret;
1459 }
1460
1461 static void __exit hmm_dmirror_exit(void)
1462 {
1463         int id;
1464
1465         for (id = 0; id < DMIRROR_NDEVICES; id++)
1466                 if (dmirror_devices[id].zone_device_type)
1467                         dmirror_device_remove(dmirror_devices + id);
1468         unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1469 }
1470
1471 module_init(hmm_dmirror_init);
1472 module_exit(hmm_dmirror_exit);
1473 MODULE_LICENSE("GPL");