2 * kprobe/swap_kprobes_deps.c
3 * @author Alexey Gerenkov <a.gerenkov@samsung.com> User-Space Probes initial implementation;
4 * Support x86/ARM/MIPS for both user and kernel spaces.
5 * @author Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 * Copyright (C) Samsung Electronics, 2006-2010
27 * @section DESCRIPTION
29 * SWAP kprobe kernel-dependent dependencies.
32 #include <linux/module.h>
33 #include <linux/sched.h>
35 #include <asm/pgtable.h>
37 #include "swap_kprobes_deps.h"
38 #include "swap_kdebug.h"
41 #include <linux/slab.h>
44 /* kernel define 'pgd_offset_k' redefinition */
46 #define pgd_offset_k(addr) pgd_offset(init_task.active_mm, addr)
50 static unsigned long swap_zero_pfn ;
52 #endif /* is_zero_pfn */
54 static inline void *swap_kmap_atomic(struct page *page)
56 return kmap_atomic(page);
58 static inline void swap_kunmap_atomic(void *kvaddr)
60 kunmap_atomic(kvaddr);
63 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file,
64 unsigned long addr, unsigned long len, unsigned long prot,
65 unsigned long flags, unsigned long pgoff,
66 unsigned long *populate);
67 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
69 struct file *file, unsigned long addr,
70 unsigned long len, unsigned long prot,
71 unsigned long flags, unsigned long pgoff,
72 unsigned long *populate)
73 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len,
74 prot, flags, pgoff, populate)
76 EXPORT_SYMBOL_GPL(swap_do_mmap_pgoff);
78 /* copy_to_user_page */
79 #ifndef copy_to_user_page
80 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma,
81 struct page *page, unsigned long uaddr, void *dst,
82 const void *src, unsigned long len);
83 DECLARE_MOD_DEP_WRAPPER(swap_copy_to_user_page,
85 struct vm_area_struct *vma, struct page *page,
86 unsigned long uaddr, void *dst, const void *src,
88 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
89 #else /* copy_to_user_page */
90 #define swap_copy_to_user_page copy_to_user_page
91 #endif /* copy_to_user_page */
94 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *,
95 struct mm_struct *mm, unsigned long addr);
97 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm,
98 struct vm_area_struct *vma, unsigned long address,
101 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *,
102 struct mm_struct *mm);
104 #ifdef __HAVE_ARCH_GATE_AREA
105 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm,
107 #endif /* __HAVE_ARCH_GATE_AREA */
109 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
111 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
112 struct page *, struct vm_area_struct *vma, \
113 unsigned long address, unsigned int foll_flags, \
114 unsigned int *page_mask);
115 DECLARE_MOD_DEP_WRAPPER(swap_follow_page_mask,
117 struct vm_area_struct *vma, unsigned long address,
118 unsigned int foll_flags, unsigned int *page_mask)
119 IMP_MOD_DEP_WRAPPER(follow_page_mask, vma, address, foll_flags, page_mask)
121 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
122 void, struct vm_area_struct *vma, struct page *page, \
123 unsigned long vmaddr);
124 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
125 struct page *, struct vm_area_struct *vma, \
126 unsigned long addr, pte_t pte);
129 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
130 void, struct task_struct *tsk);
132 DECLARE_MOD_DEP_WRAPPER(swap_find_extend_vma,
133 struct vm_area_struct *,
134 struct mm_struct *mm, unsigned long addr)
135 IMP_MOD_DEP_WRAPPER(find_extend_vma, mm, addr)
137 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
139 struct mm_struct *mm, struct vm_area_struct *vma,
140 unsigned long address, unsigned int flags)
143 return VM_FAULT_ERROR | VM_FAULT_OOM;
145 IMP_MOD_DEP_WRAPPER(handle_mm_fault, mm, vma, address, flags)
148 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
149 struct vm_area_struct *,
150 struct mm_struct *mm)
151 IMP_MOD_DEP_WRAPPER(get_gate_vma, mm)
153 #ifdef CONFIG_HUGETLB_PAGE
155 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, \
157 struct mm_struct *mm, struct vm_area_struct *vma, \
158 struct page **pages, struct vm_area_struct **vmas, \
159 unsigned long *position, unsigned long *nr_pages, \
160 long i, unsigned int flags);
161 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
163 struct mm_struct *mm, struct vm_area_struct *vma,
164 struct page **pages, struct vm_area_struct **vmas,
165 unsigned long *position, unsigned long *nr_pages,
166 long i, unsigned int flags)
167 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page, \
168 mm, vma, pages, vmas, position, nr_pages, i, flags)
170 #else /* CONFIG_HUGETLB_PAGE */
171 #define swap_follow_hugetlb_page follow_hugetlb_page
172 #endif /* CONFIG_HUGETLB_PAGE */
174 static inline int swap_in_gate_area(struct task_struct *task,
177 #ifdef __HAVE_ARCH_GATE_AREA
178 struct mm_struct *mm;
184 IMP_MOD_DEP_WRAPPER(in_gate_area, mm, addr)
185 #else /*__HAVE_ARCH_GATE_AREA */
186 return in_gate_area(task, addr);
187 #endif/*__HAVE_ARCH_GATE_AREA */
191 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_mm, int, unsigned long addr)
192 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
194 static inline int swap_in_gate_area_no_xxx(unsigned long addr)
196 return swap_in_gate_area_no_mm(addr);
199 DECLARE_MOD_DEP_WRAPPER(swap__flush_anon_page,
201 struct vm_area_struct *vma, struct page *page,
202 unsigned long vmaddr)
203 IMP_MOD_DEP_WRAPPER(__flush_anon_page, vma, page, vmaddr)
205 static inline void swap_flush_anon_page(struct vm_area_struct *vma,
207 unsigned long vmaddr)
209 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
211 swap__flush_anon_page(vma, page, vmaddr);
212 #else /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
213 flush_anon_page(vma, page, vmaddr);
214 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
217 DECLARE_MOD_DEP_WRAPPER(swap_vm_normal_page,
219 struct vm_area_struct *vma, unsigned long addr,
221 IMP_MOD_DEP_WRAPPER(vm_normal_page, vma, addr, pte)
226 * @brief Initializes module dependencies.
230 int init_module_dependencies(void)
233 INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
235 #ifndef copy_to_user_page
236 INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
237 #endif /* copy_to_user_page */
239 INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
240 INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
242 #ifdef CONFIG_HUGETLB_PAGE
243 INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
246 #ifdef __HAVE_ARCH_GATE_AREA
247 INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
250 INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
254 swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
255 #endif /* is_zero_pfn */
257 INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
259 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
260 INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
261 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
263 INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
265 INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
266 INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
272 static inline int use_zero_page(struct vm_area_struct *vma)
275 * We don't want to optimize FOLL_ANON for make_pages_present()
276 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
277 * we want to get the page from the page tables to make sure
278 * that we serialize and update with any other user of that
281 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
284 * And if we have a fault routine, it's not an anonymous region.
286 return !vma->vm_ops || !vma->vm_ops->fault;
291 #ifdef __HAVE_COLOR_ZERO_PAGE
293 static inline int swap_is_zero_pfn(unsigned long pfn)
295 unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
296 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
299 #else /* __HAVE_COLOR_ZERO_PAGE */
301 static inline int swap_is_zero_pfn(unsigned long pfn)
303 return pfn == swap_zero_pfn;
305 #endif /* __HAVE_COLOR_ZERO_PAGE */
308 static inline int stack_guard_page(struct vm_area_struct *vma,
311 return stack_guard_page_start(vma, addr) ||
312 stack_guard_page_end(vma, addr+PAGE_SIZE);
317 * @brief Gets user pages uprobe.
319 * @param tsk Pointer to the task_struct.
320 * @param mm Pointer to the mm_struct.
321 * @param start Starting address.
322 * @param nr_pages Pages number.
323 * @param gup_flags Flags.
324 * @param pages Pointer to the array of pointers to the target page structs.
325 * @param vmas Pointer to the array of pointers to the target vm_area_struct.
326 * @param nonblocking Pointer to int.
327 * @return negative error code on error, positive result otherwise.
329 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
330 unsigned long start, unsigned long nr_pages,
331 unsigned int gup_flags, struct page **pages,
332 struct vm_area_struct **vmas, int *nonblocking)
335 unsigned long vm_flags;
336 unsigned int page_mask;
341 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
344 * Require read or write permissions.
345 * If FOLL_FORCE is set, we only require the "MAY" flags.
347 vm_flags = (gup_flags & FOLL_WRITE) ?
348 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
349 vm_flags &= (gup_flags & FOLL_FORCE) ?
350 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
353 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
354 * would be called on PROT_NONE ranges. We must never invoke
355 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
356 * page faults would unprotect the PROT_NONE ranges if
357 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
358 * bitflag. So to avoid that, don't set FOLL_NUMA if
361 if (!(gup_flags & FOLL_FORCE))
362 gup_flags |= FOLL_NUMA;
367 struct vm_area_struct *vma;
369 vma = swap_find_extend_vma(mm, start);
370 if (!vma && swap_in_gate_area(tsk, start)) {
371 unsigned long pg = start & PAGE_MASK;
377 /* user gate pages are read-only */
378 if (gup_flags & FOLL_WRITE)
379 return i ? : -EFAULT;
381 pgd = pgd_offset_k(pg);
383 pgd = pgd_offset_gate(mm, pg);
384 BUG_ON(pgd_none(*pgd));
385 pud = pud_offset(pgd, pg);
386 BUG_ON(pud_none(*pud));
387 pmd = pmd_offset(pud, pg);
389 return i ? : -EFAULT;
390 VM_BUG_ON(pmd_trans_huge(*pmd));
391 pte = pte_offset_map(pmd, pg);
392 if (pte_none(*pte)) {
394 return i ? : -EFAULT;
396 vma = swap_get_gate_vma(mm);
400 page = swap_vm_normal_page(vma, start, *pte);
402 if (!(gup_flags & FOLL_DUMP) &&
403 swap_is_zero_pfn(pte_pfn(*pte)))
404 page = pte_page(*pte);
407 return i ? : -EFAULT;
419 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
420 !(vm_flags & vma->vm_flags))
421 return i ? : -EFAULT;
423 if (is_vm_hugetlb_page(vma)) {
424 i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
425 &start, &nr_pages, i, gup_flags);
431 unsigned int foll_flags = gup_flags;
432 unsigned int page_increm;
435 * If we have a pending SIGKILL, don't keep faulting
436 * pages and potentially allocating memory.
438 if (unlikely(fatal_signal_pending(current)))
439 return i ? i : -ERESTARTSYS;
441 /* cond_resched(); */
442 while (!(page = swap_follow_page_mask(vma, start,
443 foll_flags, &page_mask))) {
445 unsigned int fault_flags = 0;
447 /* For mlock, just skip the stack guard page. */
448 if (foll_flags & FOLL_MLOCK) {
449 if (stack_guard_page(vma, start))
452 if (foll_flags & FOLL_WRITE)
453 fault_flags |= FAULT_FLAG_WRITE;
455 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
456 if (foll_flags & FOLL_NOWAIT)
458 (FAULT_FLAG_ALLOW_RETRY |
459 FAULT_FLAG_RETRY_NOWAIT);
461 ret = swap_handle_mm_fault(mm, vma, start,
464 if (ret & VM_FAULT_ERROR) {
465 if (ret & VM_FAULT_OOM)
466 return i ? i : -ENOMEM;
467 if (ret & (VM_FAULT_HWPOISON |
468 VM_FAULT_HWPOISON_LARGE)) {
477 if (ret & VM_FAULT_SIGBUS)
478 return i ? i : -EFAULT;
483 if (ret & VM_FAULT_MAJOR)
489 if (ret & VM_FAULT_RETRY) {
496 * The VM_FAULT_WRITE bit tells us that
497 * do_wp_page has broken COW when necessary,
498 * even if maybe_mkwrite decided not to set
499 * pte_write. We can thus safely do subsequent
500 * page lookups as if they were reads. But only
501 * do so when looping for pte_write is futile:
502 * in some cases userspace may also be wanting
503 * to write to the gotten user page, which a
504 * read fault here might prevent (a readonly
505 * page might get reCOWed by userspace write).
507 if ((ret & VM_FAULT_WRITE) &&
508 !(vma->vm_flags & VM_WRITE))
509 foll_flags &= ~FOLL_WRITE;
511 /* cond_resched(); */
514 return i ? i : PTR_ERR(page);
518 swap_flush_anon_page(vma, page, start);
519 flush_dcache_page(page);
527 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
528 if (page_increm > nr_pages)
529 page_increm = nr_pages;
531 start += page_increm * PAGE_SIZE;
532 nr_pages -= page_increm;
533 } while (nr_pages && start < vma->vm_end);
541 * @brief Gets user pages uprobe.
543 * @param tsk Pointer to the task_struct.
544 * @param mm Pointer to the mm_struct.
545 * @param start Starting address.
547 * @param write Write flag.
548 * @param force Force flag.
549 * @param pages Pointer to the array of pointers to the target page structs.
550 * @param vmas Pointer to the array of pointers to the target vm_area_struct.
551 * @return negative error code on error, positive result otherwise.
553 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
554 unsigned long start, int len, int write, int force,
555 struct page **pages, struct vm_area_struct **vmas)
557 int flags = FOLL_TOUCH;
566 return __get_user_pages_uprobe(tsk, mm,
571 #define ACCESS_PROCESS_OPTIMIZATION 0
573 #if ACCESS_PROCESS_OPTIMIZATION
575 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
576 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
578 static void read_data_current(unsigned long addr, void *buf, int len)
583 for (step = GET_STEP_4(len); len; len -= step) {
584 switch (GET_STEP_4(len)) {
586 get_user(*(u8 *)(buf + pos),
587 (unsigned long *)(addr + pos));
593 get_user(*(u16 *)(buf + pos),
594 (unsigned long *)(addr + pos));
599 get_user(*(u32 *)(buf + pos),
600 (unsigned long *)(addr + pos));
610 static void write_data_current(unsigned long addr, void *buf, int len)
615 for (step = GET_STEP_4(len); len; len -= step) {
616 switch (GET_STEP_4(len)) {
618 put_user(*(u8 *)(buf + pos),
619 (unsigned long *)(addr + pos));
625 put_user(*(u16 *)(buf + pos),
626 (unsigned long *)(addr + pos));
631 put_user(*(u32 *)(buf + pos),
632 (unsigned long *)(addr + pos));
643 * @brief Read-write task memory.
645 * @param tsk Pointer to the target task task_struct.
646 * @param addr Address to read-write.
647 * @param buf Pointer to buffer where to put-get data.
648 * @param len Buffer length.
649 * @param write Write flag. If 0 - reading, if 1 - writing.
650 * @return Read-write size, error code on error.
652 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr,
653 void *buf, int len, int write)
655 struct mm_struct *mm;
656 struct vm_area_struct *vma;
663 #if ACCESS_PROCESS_OPTIMIZATION
664 if (write == 0 && tsk == current) {
665 read_data_current(addr, buf, len);
670 mm = tsk->mm; /* function 'get_task_mm' is to be called */
674 /* FIXME: danger: write memory in atomic context */
675 atomic = in_atomic();
677 /* ignore errors, just check how much was successfully transferred */
679 int bytes, ret, offset;
681 struct page *page = NULL;
683 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
684 write, 1, &page, &vma);
688 * Check if this is a VM_IO | VM_PFNMAP VMA, which
689 * we can access using slightly different code.
691 #ifdef CONFIG_HAVE_IOREMAP_PROT
692 vma = find_vma(mm, addr);
695 if (vma->vm_ops && vma->vm_ops->access)
696 ret = vma->vm_ops->access(vma, addr, buf,
704 offset = addr & (PAGE_SIZE-1);
705 if (bytes > PAGE_SIZE-offset)
706 bytes = PAGE_SIZE-offset;
708 maddr = atomic ? swap_kmap_atomic(page) : kmap(page);
711 swap_copy_to_user_page(vma, page, addr,
714 set_page_dirty_lock(page);
716 copy_from_user_page(vma, page, addr,
721 atomic ? swap_kunmap_atomic(maddr) : kunmap(page);
722 page_cache_release(page);
729 return buf - old_buf;
731 EXPORT_SYMBOL_GPL(access_process_vm_atomic);
734 * @brief Page present.
736 * @param mm Pointer to the target mm_struct.
737 * @param address Address.
739 int page_present(struct mm_struct *mm, unsigned long address)
747 pgd = pgd_offset(mm, address);
748 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
751 pud = pud_offset(pgd, address);
752 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
755 pmd = pmd_offset(pud, address);
756 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
759 ptep = pte_offset_map(pmd, address);
765 if (pte_present(pte)) {
774 EXPORT_SYMBOL_GPL(page_present);