2 * kprobe/swap_kprobes_deps.c
3 * @author Alexey Gerenkov <a.gerenkov@samsung.com> User-Space Probes initial implementation;
4 * Support x86/ARM/MIPS for both user and kernel spaces.
5 * @author Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 * Copyright (C) Samsung Electronics, 2006-2010
27 * @section DESCRIPTION
29 * SWAP kprobe kernel-dependent dependencies.
32 #include <linux/module.h>
33 #include <linux/sched.h>
35 #include <asm/pgtable.h>
37 #include "swap_kprobes_deps.h"
38 #include "swap_kdebug.h"
41 #include <linux/slab.h>
44 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
45 /* kernel define 'pgd_offset_k' redefinition */
47 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
50 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
53 static unsigned long swap_zero_pfn ;
55 #endif /* is_zero_pfn */
56 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
58 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
59 static inline void *swap_kmap_atomic(struct page *page)
61 return kmap_atomic(page);
63 static inline void swap_kunmap_atomic(void *kvaddr)
65 kunmap_atomic(kvaddr);
67 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
68 static inline void *swap_kmap_atomic(struct page *page)
70 return kmap_atomic(page, KM_USER0);
73 static inline void swap_kunmap_atomic(void *kvaddr)
75 kunmap_atomic(kvaddr, KM_USER0);
77 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
79 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
80 DECLARE_MOD_FUNC_DEP(do_mmap, unsigned long, struct file *file,
81 unsigned long addr, unsigned long len, unsigned long prot,
82 unsigned long flags, vm_flags_t vm_flags,
83 unsigned long pgoff, unsigned long *populate);
84 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap,
86 struct file *file, unsigned long addr,
87 unsigned long len, unsigned long prot,
88 unsigned long flags, vm_flags_t vm_flags,
89 unsigned long pgoff, unsigned long *populate)
90 IMP_MOD_DEP_WRAPPER(do_mmap, file, addr, len,
91 prot, flags, vm_flags, pgoff, populate)
92 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
93 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file,
94 unsigned long addr, unsigned long len, unsigned long prot,
95 unsigned long flags, unsigned long pgoff,
96 unsigned long *populate);
97 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
99 struct file *file, unsigned long addr,
100 unsigned long len, unsigned long prot,
101 unsigned long flags, unsigned long pgoff,
102 unsigned long *populate)
103 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len,
104 prot, flags, pgoff, populate)
105 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
106 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file,
107 unsigned long addr, unsigned long len, unsigned long prot,
108 unsigned long flags, unsigned long pgoff);
109 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
111 struct file *file, unsigned long addr,
112 unsigned long len, unsigned long prot,
113 unsigned long flags, unsigned long pgoff)
114 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
115 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */
117 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
118 EXPORT_SYMBOL_GPL(swap_do_mmap);
119 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
120 EXPORT_SYMBOL_GPL(swap_do_mmap_pgoff);
121 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */
123 /* copy_to_user_page */
124 #ifndef copy_to_user_page
125 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma,
126 struct page *page, unsigned long uaddr, void *dst,
127 const void *src, unsigned long len);
128 DECLARE_MOD_DEP_WRAPPER(swap_copy_to_user_page,
130 struct vm_area_struct *vma, struct page *page,
131 unsigned long uaddr, void *dst, const void *src,
133 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
134 #else /* copy_to_user_page */
135 #define swap_copy_to_user_page copy_to_user_page
136 #endif /* copy_to_user_page */
139 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *,
140 struct mm_struct *mm, unsigned long addr);
142 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
143 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
144 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm,
145 struct vm_area_struct *vma, unsigned long address,
149 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm,
150 struct vm_area_struct *vma, unsigned long address,
152 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
154 #ifdef __HAVE_ARCH_GATE_AREA
155 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
156 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *,
157 struct mm_struct *mm);
158 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
159 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *,
160 struct task_struct *tsk);
161 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
163 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
164 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm,
166 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
167 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task,
169 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
171 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
172 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
173 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
174 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
175 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
176 #endif /* __HAVE_ARCH_GATE_AREA */
178 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
179 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
180 struct page *, struct vm_area_struct *vma, \
181 unsigned long address, unsigned int foll_flags, \
182 unsigned int *page_mask);
183 DECLARE_MOD_DEP_WRAPPER(swap_follow_page_mask,
185 struct vm_area_struct *vma, unsigned long address,
186 unsigned int foll_flags, unsigned int *page_mask)
187 IMP_MOD_DEP_WRAPPER(follow_page_mask, vma, address, foll_flags, page_mask)
188 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
189 static DECLARE_MOD_FUNC_DEP(follow_page, \
190 struct page *, struct vm_area_struct *vma, \
191 unsigned long address, unsigned int foll_flags);
192 DECLARE_MOD_DEP_WRAPPER(swap_follow_page,
194 struct vm_area_struct *vma, unsigned long address,
195 unsigned int foll_flags)
196 IMP_MOD_DEP_WRAPPER(follow_page, vma, address, foll_flags)
197 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
199 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
200 void, struct vm_area_struct *vma, struct page *page, \
201 unsigned long vmaddr);
202 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
203 struct page *, struct vm_area_struct *vma, \
204 unsigned long addr, pte_t pte);
207 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
208 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
209 void, struct task_struct *tsk);
211 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
212 void, struct rcu_head *rhp);
215 DECLARE_MOD_DEP_WRAPPER(swap_find_extend_vma,
216 struct vm_area_struct *,
217 struct mm_struct *mm, unsigned long addr)
218 IMP_MOD_DEP_WRAPPER(find_extend_vma, mm, addr)
220 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
221 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
222 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
224 struct mm_struct *mm, struct vm_area_struct *vma,
225 unsigned long address, int write_access)
228 return VM_FAULT_ERROR | VM_FAULT_OOM;
230 IMP_MOD_DEP_WRAPPER(handle_mm_fault, mm, vma, address, write_access)
232 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18) */
233 #else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
234 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
236 struct mm_struct *mm, struct vm_area_struct *vma,
237 unsigned long address, unsigned int flags)
240 return VM_FAULT_ERROR | VM_FAULT_OOM;
242 IMP_MOD_DEP_WRAPPER(handle_mm_fault, mm, vma, address, flags)
244 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
246 struct vm_area_struct *swap_get_gate_vma(struct mm_struct *mm)
248 #ifdef __HAVE_ARCH_GATE_AREA
249 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
250 IMP_MOD_DEP_WRAPPER(get_gate_vma, mm)
251 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
252 IMP_MOD_DEP_WRAPPER(get_gate_vma, tsk)
253 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
254 #else /* __HAVE_ARCH_GATE_AREA */
255 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
256 return get_gate_vma(mm);
257 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
258 return get_gate_vma(tsk);
259 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
260 #endif /* __HAVE_ARCH_GATE_AREA */
263 #ifdef CONFIG_HUGETLB_PAGE
265 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
266 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, \
268 struct mm_struct *mm, struct vm_area_struct *vma, \
269 struct page **pages, struct vm_area_struct **vmas, \
270 unsigned long *position, int *length, int i, \
272 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
274 struct mm_struct *mm, struct vm_area_struct *vma,
275 struct page **pages, struct vm_area_struct **vmas,
276 unsigned long *position, int *length, int i,
278 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page, \
279 mm, vma, pages, vmas, position, length, i, flags)
280 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
281 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, \
283 struct mm_struct *mm, struct vm_area_struct *vma, \
284 struct page **pages, struct vm_area_struct **vmas, \
285 unsigned long *position, unsigned long *nr_pages, \
286 long i, unsigned int flags);
287 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
289 struct mm_struct *mm, struct vm_area_struct *vma,
290 struct page **pages, struct vm_area_struct **vmas,
291 unsigned long *position, unsigned long *nr_pages,
292 long i, unsigned int flags)
293 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page, \
294 mm, vma, pages, vmas, position, nr_pages, i, flags)
295 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
297 #else /* CONFIG_HUGETLB_PAGE */
298 #define swap_follow_hugetlb_page follow_hugetlb_page
299 #endif /* CONFIG_HUGETLB_PAGE */
301 static inline int swap_in_gate_area(struct task_struct *task,
304 #ifdef __HAVE_ARCH_GATE_AREA
305 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
306 struct mm_struct *mm;
312 IMP_MOD_DEP_WRAPPER(in_gate_area, mm, addr)
313 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
314 IMP_MOD_DEP_WRAPPER(in_gate_area, task, addr)
315 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
316 #else /*__HAVE_ARCH_GATE_AREA */
317 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
318 struct mm_struct *mm;
324 return in_gate_area(mm, addr);
326 return in_gate_area(task, addr);
328 #endif/*__HAVE_ARCH_GATE_AREA */
332 #ifdef __HAVE_ARCH_GATE_AREA
333 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
334 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_mm, int, unsigned long addr)
335 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
336 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
337 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_task, int, unsigned long addr)
338 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
339 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
340 #endif /* __HAVE_ARCH_GATE_AREA */
342 static inline int swap_in_gate_area_no_xxx(unsigned long addr)
344 #ifdef __HAVE_ARCH_GATE_AREA
345 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
346 return swap_in_gate_area_no_mm(addr);
347 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
348 return swap_in_gate_area_no_task(addr);
349 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
350 #else /* __HAVE_ARCH_GATE_AREA */
351 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
352 return in_gate_area_no_mm(addr);
353 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
354 return in_gate_area_no_task(addr);
355 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
356 #endif /* __HAVE_ARCH_GATE_AREA */
359 DECLARE_MOD_DEP_WRAPPER(swap__flush_anon_page,
361 struct vm_area_struct *vma, struct page *page,
362 unsigned long vmaddr)
363 IMP_MOD_DEP_WRAPPER(__flush_anon_page, vma, page, vmaddr)
365 static inline void swap_flush_anon_page(struct vm_area_struct *vma,
367 unsigned long vmaddr)
369 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
371 swap__flush_anon_page(vma, page, vmaddr);
372 #else /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
373 flush_anon_page(vma, page, vmaddr);
374 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
377 DECLARE_MOD_DEP_WRAPPER(swap_vm_normal_page,
379 struct vm_area_struct *vma, unsigned long addr,
381 IMP_MOD_DEP_WRAPPER(vm_normal_page, vma, addr, pte)
386 * @brief Initializes module dependencies.
390 int init_module_dependencies(void)
393 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
394 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
397 #ifndef copy_to_user_page
398 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
399 #endif /* copy_to_user_page */
401 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
403 #ifdef CONFIG_HUGETLB_PAGE
404 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
407 #ifdef __HAVE_ARCH_GATE_AREA
408 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
409 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
411 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
412 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
413 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
414 INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
415 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
418 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
419 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
420 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
421 INIT_MOD_DEP_VAR(follow_page, follow_page);
422 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
424 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
427 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
428 #endif /* is_zero_pfn */
429 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
431 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
432 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
433 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
435 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
437 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
438 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
439 INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
441 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
444 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
446 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
447 INIT_MOD_DEP_VAR(do_mmap, do_mmap);
448 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
449 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
450 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */
458 static int do_access_process_vm(struct task_struct *tsk, struct mm_struct *mm,
459 unsigned long addr, void *buf, int len,
462 struct vm_area_struct *vma;
466 int bytes, ret, offset;
468 struct page *page = NULL;
470 # if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
471 ret = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma);
472 # else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */
473 ret = get_user_pages_remote(tsk, mm, addr, 1,
474 FOLL_WRITE | FOLL_FORCE,
476 # endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) */
479 #ifndef CONFIG_HAVE_IOREMAP_PROT
483 * Check if this is a VM_IO | VM_PFNMAP VMA, which
484 * we can access using slightly different code.
486 vma = find_vma(mm, addr);
487 if (!vma || vma->vm_start > addr)
489 if (vma->vm_ops && vma->vm_ops->access)
490 ret = vma->vm_ops->access(vma, addr, buf, len,
498 offset = addr & (PAGE_SIZE-1);
499 if (bytes > PAGE_SIZE-offset)
500 bytes = PAGE_SIZE-offset;
504 swap_copy_to_user_page(vma, page, addr,
507 set_page_dirty_lock(page);
509 copy_from_user_page(vma, page, addr,
510 buf, maddr + offset, bytes);
520 return buf - old_buf;
523 int swap_access_process_vm(struct task_struct *tsk, unsigned long addr,
524 void *buf, int len, int write)
527 struct mm_struct *mm;
529 mm = get_task_mm(tsk);
533 ret = do_access_process_vm(tsk, mm, addr, buf, len, write);
538 EXPORT_SYMBOL_GPL(swap_access_process_vm);
540 #else /* CONFIG_ARM64 */
542 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
543 #define GUP_FLAGS_WRITE 0x1
544 #define GUP_FLAGS_FORCE 0x2
545 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
546 #define GUP_FLAGS_IGNORE_SIGKILL 0x8
547 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
549 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
550 static inline int use_zero_page(struct vm_area_struct *vma)
553 * We don't want to optimize FOLL_ANON for make_pages_present()
554 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
555 * we want to get the page from the page tables to make sure
556 * that we serialize and update with any other user of that
559 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
562 * And if we have a fault routine, it's not an anonymous region.
564 return !vma->vm_ops || !vma->vm_ops->fault;
567 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
569 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
571 #ifdef __HAVE_COLOR_ZERO_PAGE
573 static inline int swap_is_zero_pfn(unsigned long pfn)
575 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
576 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
579 #else /* __HAVE_COLOR_ZERO_PAGE */
581 static inline int swap_is_zero_pfn(unsigned long pfn)
583 return pfn == swap_zero_pfn;
585 #endif /* __HAVE_COLOR_ZERO_PAGE */
587 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
589 static inline int swap_is_zero_pfn(unsigned long pfn)
592 return pfn == swap_zero_pfn;
593 #else /* is_zero_pfn */
594 return is_zero_pfn(pfn);
595 #endif /* is_zero_pfn */
598 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
600 static inline int stack_guard_page(struct vm_area_struct *vma,
603 return stack_guard_page_start(vma, addr) ||
604 stack_guard_page_end(vma, addr+PAGE_SIZE);
607 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
610 * @brief Gets user pages uprobe.
612 * @param tsk Pointer to the task_struct.
613 * @param mm Pointer to the mm_struct.
614 * @param start Starting address.
615 * @param nr_pages Pages number.
616 * @param gup_flags Flags.
617 * @param pages Pointer to the array of pointers to the target page structs.
618 * @param vmas Pointer to the array of pointers to the target vm_area_struct.
619 * @param nonblocking Pointer to int.
620 * @return negative error code on error, positive result otherwise.
622 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
623 unsigned long start, unsigned long nr_pages,
624 unsigned int gup_flags, struct page **pages,
625 struct vm_area_struct **vmas, int *nonblocking)
628 unsigned long vm_flags;
629 unsigned int page_mask;
634 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
637 * Require read or write permissions.
638 * If FOLL_FORCE is set, we only require the "MAY" flags.
640 vm_flags = (gup_flags & FOLL_WRITE) ?
641 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
642 vm_flags &= (gup_flags & FOLL_FORCE) ?
643 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
646 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
647 * would be called on PROT_NONE ranges. We must never invoke
648 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
649 * page faults would unprotect the PROT_NONE ranges if
650 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
651 * bitflag. So to avoid that, don't set FOLL_NUMA if
654 if (!(gup_flags & FOLL_FORCE))
655 gup_flags |= FOLL_NUMA;
660 struct vm_area_struct *vma;
662 vma = swap_find_extend_vma(mm, start);
663 if (!vma && swap_in_gate_area(tsk, start)) {
664 unsigned long pg = start & PAGE_MASK;
670 /* user gate pages are read-only */
671 if (gup_flags & FOLL_WRITE)
672 return i ? : -EFAULT;
674 pgd = pgd_offset_k(pg);
676 pgd = pgd_offset_gate(mm, pg);
677 BUG_ON(pgd_none(*pgd));
678 pud = pud_offset(pgd, pg);
679 BUG_ON(pud_none(*pud));
680 pmd = pmd_offset(pud, pg);
682 return i ? : -EFAULT;
683 VM_BUG_ON(pmd_trans_huge(*pmd));
684 pte = pte_offset_map(pmd, pg);
685 if (pte_none(*pte)) {
687 return i ? : -EFAULT;
689 vma = swap_get_gate_vma(mm);
693 page = swap_vm_normal_page(vma, start, *pte);
695 if (!(gup_flags & FOLL_DUMP) &&
696 swap_is_zero_pfn(pte_pfn(*pte)))
697 page = pte_page(*pte);
700 return i ? : -EFAULT;
712 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
713 !(vm_flags & vma->vm_flags))
714 return i ? : -EFAULT;
716 if (is_vm_hugetlb_page(vma)) {
717 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
718 &start, &nr_pages, i, gup_flags);
724 unsigned int foll_flags = gup_flags;
725 unsigned int page_increm;
728 * If we have a pending SIGKILL, don't keep faulting
729 * pages and potentially allocating memory.
731 if (unlikely(fatal_signal_pending(current)))
732 return i ? i : -ERESTARTSYS;
734 /* cond_resched(); */
735 while (!(page = swap_follow_page_mask(vma, start,
736 foll_flags, &page_mask))) {
738 unsigned int fault_flags = 0;
740 /* For mlock, just skip the stack guard page. */
741 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
742 if (foll_flags & FOLL_POPULATE) {
743 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) */
744 if (foll_flags & FOLL_MLOCK) {
745 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) */
746 if (stack_guard_page(vma, start))
749 if (foll_flags & FOLL_WRITE)
750 fault_flags |= FAULT_FLAG_WRITE;
752 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
753 if (foll_flags & FOLL_NOWAIT)
755 (FAULT_FLAG_ALLOW_RETRY |
756 FAULT_FLAG_RETRY_NOWAIT);
758 ret = swap_handle_mm_fault(mm, vma, start,
761 if (ret & VM_FAULT_ERROR) {
762 if (ret & VM_FAULT_OOM)
763 return i ? i : -ENOMEM;
764 if (ret & (VM_FAULT_HWPOISON |
765 VM_FAULT_HWPOISON_LARGE)) {
774 if (ret & VM_FAULT_SIGBUS)
775 return i ? i : -EFAULT;
780 if (ret & VM_FAULT_MAJOR)
786 if (ret & VM_FAULT_RETRY) {
793 * The VM_FAULT_WRITE bit tells us that
794 * do_wp_page has broken COW when necessary,
795 * even if maybe_mkwrite decided not to set
796 * pte_write. We can thus safely do subsequent
797 * page lookups as if they were reads. But only
798 * do so when looping for pte_write is futile:
799 * in some cases userspace may also be wanting
800 * to write to the gotten user page, which a
801 * read fault here might prevent (a readonly
802 * page might get reCOWed by userspace write).
804 if ((ret & VM_FAULT_WRITE) &&
805 !(vma->vm_flags & VM_WRITE))
806 foll_flags &= ~FOLL_WRITE;
808 /* cond_resched(); */
811 return i ? i : PTR_ERR(page);
815 swap_flush_anon_page(vma, page, start);
816 flush_dcache_page(page);
824 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
825 if (page_increm > nr_pages)
826 page_increm = nr_pages;
828 start += page_increm * PAGE_SIZE;
829 nr_pages -= page_increm;
830 } while (nr_pages && start < vma->vm_end);
835 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
837 static int __get_user_pages_uprobe(struct task_struct *tsk,
838 struct mm_struct *mm, unsigned long start,
839 int nr_pages, unsigned int gup_flags,
841 struct vm_area_struct **vmas,
845 unsigned long vm_flags;
850 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
853 * Require read or write permissions.
854 * If FOLL_FORCE is set, we only require the "MAY" flags.
856 vm_flags = (gup_flags & FOLL_WRITE) ?
857 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
858 vm_flags &= (gup_flags & FOLL_FORCE) ?
859 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
863 struct vm_area_struct *vma;
865 vma = swap_find_extend_vma(mm, start);
866 if (!vma && swap_in_gate_area_no_xxx(start)) {
867 unsigned long pg = start & PAGE_MASK;
873 /* user gate pages are read-only */
874 if (gup_flags & FOLL_WRITE)
875 return i ? : -EFAULT;
877 pgd = pgd_offset_k(pg);
879 pgd = pgd_offset_gate(mm, pg);
880 BUG_ON(pgd_none(*pgd));
881 pud = pud_offset(pgd, pg);
882 BUG_ON(pud_none(*pud));
883 pmd = pmd_offset(pud, pg);
885 return i ? : -EFAULT;
886 VM_BUG_ON(pmd_trans_huge(*pmd));
887 pte = pte_offset_map(pmd, pg);
888 if (pte_none(*pte)) {
890 return i ? : -EFAULT;
892 vma = swap_get_gate_vma(mm);
896 page = swap_vm_normal_page(vma, start, *pte);
898 if (!(gup_flags & FOLL_DUMP) &&
899 swap_is_zero_pfn(pte_pfn(*pte)))
900 page = pte_page(*pte);
903 return i ? : -EFAULT;
914 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
915 !(vm_flags & vma->vm_flags)) {
916 return i ? : -EFAULT;
919 if (is_vm_hugetlb_page(vma)) {
920 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
921 &start, &nr_pages, i, gup_flags);
927 unsigned int foll_flags = gup_flags;
930 * If we have a pending SIGKILL, don't keep faulting
931 * pages and potentially allocating memory.
933 if (unlikely(fatal_signal_pending(current)))
934 return i ? i : -ERESTARTSYS;
936 /* cond_resched(); */
937 while (!(page = swap_follow_page(vma, start,
940 unsigned int fault_flags = 0;
942 /* For mlock, just skip the stack guard page. */
943 if (foll_flags & FOLL_MLOCK) {
944 if (stack_guard_page(vma, start))
947 if (foll_flags & FOLL_WRITE)
948 fault_flags |= FAULT_FLAG_WRITE;
950 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
951 if (foll_flags & FOLL_NOWAIT)
953 (FAULT_FLAG_ALLOW_RETRY |
954 FAULT_FLAG_RETRY_NOWAIT);
956 ret = swap_handle_mm_fault(mm, vma, start,
959 if (ret & VM_FAULT_ERROR) {
960 if (ret & VM_FAULT_OOM)
961 return i ? i : -ENOMEM;
962 if (ret & (VM_FAULT_HWPOISON |
963 VM_FAULT_HWPOISON_LARGE)) {
972 if (ret & VM_FAULT_SIGBUS)
973 return i ? i : -EFAULT;
978 if (ret & VM_FAULT_MAJOR)
984 if (ret & VM_FAULT_RETRY) {
991 * The VM_FAULT_WRITE bit tells us that
992 * do_wp_page has broken COW when necessary,
993 * even if maybe_mkwrite decided not to set
994 * pte_write. We can thus safely do subsequent
995 * page lookups as if they were reads. But only
996 * do so when looping for pte_write is futile:
997 * in some cases userspace may also be wanting
998 * to write to the gotten user page, which a
999 * read fault here might prevent (a readonly
1000 * page might get reCOWed by userspace write).
1002 if ((ret & VM_FAULT_WRITE) &&
1003 !(vma->vm_flags & VM_WRITE))
1004 foll_flags &= ~FOLL_WRITE;
1006 /* cond_resched(); */
1009 return i ? i : PTR_ERR(page);
1013 swap_flush_anon_page(vma, page, start);
1014 flush_dcache_page(page);
1022 } while (nr_pages && start < vma->vm_end);
1028 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
1030 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1032 static int __get_user_pages_uprobe(struct task_struct *tsk,
1033 struct mm_struct *mm,
1034 unsigned long start, int len, int flags,
1035 struct page **pages,
1036 struct vm_area_struct **vmas)
1039 unsigned int vm_flags = 0;
1040 int write = !!(flags & GUP_FLAGS_WRITE);
1041 int force = !!(flags & GUP_FLAGS_FORCE);
1042 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1047 * Require read or write permissions.
1048 * If 'force' is set, we only require the "MAY" flags.
1050 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1051 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1055 struct vm_area_struct *vma;
1056 unsigned int foll_flags;
1058 vma = find_vma(mm, start);
1059 if (!vma && swap_in_gate_area(tsk, start)) {
1060 unsigned long pg = start & PAGE_MASK;
1061 struct vm_area_struct *gate_vma =
1062 swap_get_gate_vma(tsk);
1068 /* user gate pages are read-only */
1069 if (!ignore && write)
1070 return i ? : -EFAULT;
1072 pgd = pgd_offset_k(pg);
1074 pgd = pgd_offset_gate(mm, pg);
1075 BUG_ON(pgd_none(*pgd));
1076 pud = pud_offset(pgd, pg);
1077 BUG_ON(pud_none(*pud));
1078 pmd = pmd_offset(pud, pg);
1080 return i ? : -EFAULT;
1081 pte = pte_offset_map(pmd, pg);
1082 if (pte_none(*pte)) {
1084 return i ? : -EFAULT;
1088 swap_vm_normal_page(gate_vma, start,
1104 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1105 (!ignore && !(vm_flags & vma->vm_flags)))
1106 return i ? : -EFAULT;
1108 if (is_vm_hugetlb_page(vma)) {
1109 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
1110 &start, &len, i, write);
1114 foll_flags = FOLL_TOUCH;
1116 foll_flags |= FOLL_GET;
1118 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
1119 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
1120 if (!write && use_zero_page(vma))
1121 foll_flags |= FOLL_ANON;
1129 foll_flags |= FOLL_WRITE;
1132 /* cond_resched(); */
1134 DBPRINTF("pages = %p vma = %p\n", pages, vma);
1135 while (!(page = swap_follow_page(vma, start,
1138 ret = swap_handle_mm_fault(mm, vma, start,
1139 foll_flags & FOLL_WRITE);
1141 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1142 if (ret & VM_FAULT_WRITE)
1143 foll_flags &= ~FOLL_WRITE;
1145 switch (ret & ~VM_FAULT_WRITE) {
1146 case VM_FAULT_MINOR:
1149 case VM_FAULT_MAJOR:
1152 case VM_FAULT_SIGBUS:
1153 return i ? i : -EFAULT;
1155 return i ? i : -ENOMEM;
1161 if (ret & VM_FAULT_ERROR) {
1162 if (ret & VM_FAULT_OOM)
1163 return i ? i : -ENOMEM;
1164 else if (ret & VM_FAULT_SIGBUS)
1165 return i ? i : -EFAULT;
1168 if (ret & VM_FAULT_MAJOR)
1174 * The VM_FAULT_WRITE bit tells us that
1175 * do_wp_page has broken COW when necessary,
1176 * even if maybe_mkwrite decided not to set
1177 * pte_write. We can thus safely do subsequent
1178 * page lookups as if they were reads. But only
1179 * do so when looping for pte_write is futile:
1180 * in some cases userspace may also be wanting
1181 * to write to the gotten user page, which a
1182 * read fault here might prevent (a readonly
1183 * page might get reCOWed by userspace write).
1185 if ((ret & VM_FAULT_WRITE) &&
1186 !(vma->vm_flags & VM_WRITE))
1187 foll_flags &= ~FOLL_WRITE;
1189 /* cond_resched(); */
1195 return i ? i : PTR_ERR(page);
1199 swap_flush_anon_page(vma, page, start);
1200 flush_dcache_page(page);
1207 } while (len && start < vma->vm_end);
1212 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1215 * @brief Gets user pages uprobe.
1217 * @param tsk Pointer to the task_struct.
1218 * @param mm Pointer to the mm_struct.
1219 * @param start Starting address.
1220 * @param len Length.
1221 * @param write Write flag.
1222 * @param force Force flag.
1223 * @param pages Pointer to the array of pointers to the target page structs.
1224 * @param vmas Pointer to the array of pointers to the target vm_area_struct.
1225 * @return negative error code on error, positive result otherwise.
1227 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
1228 unsigned long start, int len, int write, int force,
1229 struct page **pages, struct vm_area_struct **vmas)
1231 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
1232 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
1233 int flags = FOLL_TOUCH;
1238 flags |= FOLL_WRITE;
1240 flags |= FOLL_FORCE;
1241 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1245 flags |= GUP_FLAGS_WRITE;
1247 flags |= GUP_FLAGS_FORCE;
1248 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1250 return __get_user_pages_uprobe(tsk, mm,
1252 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1254 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1256 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1258 return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1262 #define ACCESS_PROCESS_OPTIMIZATION 0
1264 #if ACCESS_PROCESS_OPTIMIZATION
1266 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1267 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1269 static void read_data_current(unsigned long addr, void *buf, int len)
1274 for (step = GET_STEP_4(len); len; len -= step) {
1275 switch (GET_STEP_4(len)) {
1277 get_user(*(u8 *)(buf + pos),
1278 (unsigned long *)(addr + pos));
1284 get_user(*(u16 *)(buf + pos),
1285 (unsigned long *)(addr + pos));
1290 get_user(*(u32 *)(buf + pos),
1291 (unsigned long *)(addr + pos));
1301 static void write_data_current(unsigned long addr, void *buf, int len)
1306 for (step = GET_STEP_4(len); len; len -= step) {
1307 switch (GET_STEP_4(len)) {
1309 put_user(*(u8 *)(buf + pos),
1310 (unsigned long *)(addr + pos));
1316 put_user(*(u16 *)(buf + pos),
1317 (unsigned long *)(addr + pos));
1322 put_user(*(u32 *)(buf + pos),
1323 (unsigned long *)(addr + pos));
1334 * @brief Read-write task memory.
1336 * @param tsk Pointer to the target task task_struct.
1337 * @param addr Address to read-write.
1338 * @param buf Pointer to buffer where to put-get data.
1339 * @param len Buffer length.
1340 * @param write Write flag. If 0 - reading, if 1 - writing.
1341 * @return Read-write size, error code on error.
1343 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr,
1344 void *buf, int len, int write)
1346 struct mm_struct *mm;
1347 struct vm_area_struct *vma;
1348 void *old_buf = buf;
1354 #if ACCESS_PROCESS_OPTIMIZATION
1355 if (write == 0 && tsk == current) {
1356 read_data_current(addr, buf, len);
1361 mm = tsk->mm; /* function 'get_task_mm' is to be called */
1365 /* FIXME: danger: write memory in atomic context */
1366 atomic = in_atomic();
1368 /* ignore errors, just check how much was successfully transferred */
1370 int bytes, ret, offset;
1372 struct page *page = NULL;
1374 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1375 write, 1, &page, &vma);
1379 * Check if this is a VM_IO | VM_PFNMAP VMA, which
1380 * we can access using slightly different code.
1382 #ifdef CONFIG_HAVE_IOREMAP_PROT
1383 vma = find_vma(mm, addr);
1386 if (vma->vm_ops && vma->vm_ops->access)
1387 ret = vma->vm_ops->access(vma, addr, buf,
1395 offset = addr & (PAGE_SIZE-1);
1396 if (bytes > PAGE_SIZE-offset)
1397 bytes = PAGE_SIZE-offset;
1399 maddr = atomic ? swap_kmap_atomic(page) : kmap(page);
1402 swap_copy_to_user_page(vma, page, addr,
1405 set_page_dirty_lock(page);
1407 copy_from_user_page(vma, page, addr,
1408 buf, maddr + offset,
1412 atomic ? swap_kunmap_atomic(maddr) : kunmap(page);
1413 page_cache_release(page);
1420 return buf - old_buf;
1422 EXPORT_SYMBOL_GPL(access_process_vm_atomic);
1424 #endif /* CONFIG_ARM64 */
1427 * @brief Page present.
1429 * @param mm Pointer to the target mm_struct.
1430 * @param address Address.
1432 int page_present(struct mm_struct *mm, unsigned long address)
1440 pgd = pgd_offset(mm, address);
1441 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1444 pud = pud_offset(pgd, address);
1445 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1448 pmd = pmd_offset(pud, address);
1449 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1452 ptep = pte_offset_map(pmd, address);
1453 if (pte_none(*ptep)) {
1460 if (pte_present(pte)) {
1469 EXPORT_SYMBOL_GPL(page_present);