Merge branch 'dev' into kernel
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
24  *
25  */
26
27 #include <linux/module.h>
28 #include <linux/sched.h>
29
30 #include <asm/pgtable.h>
31
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
34
35
36 #include <linux/slab.h>
37 #include <linux/mm.h>
38
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41 unsigned long exit_addr;
42
43 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
44 /* kernel define 'pgd_offset_k' redefinition */
45 #undef pgd_offset_k
46 #define pgd_offset_k(addr)      pgd_offset(init_task.active_mm, addr)
47 #endif
48
49 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
50 #ifndef is_zero_pfn
51
52 static unsigned long swap_zero_pfn = 0;
53
54 #endif /* is_zero_pfn */
55 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
56
57 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
58 static inline void *dbi_kmap_atomic(struct page *page)
59 {
60         return kmap_atomic(page);
61 }
62 static inline void dbi_kunmap_atomic(void *kvaddr)
63 {
64         kunmap_atomic(kvaddr);
65 }
66 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
67 static inline void *dbi_kmap_atomic(struct page *page)
68 {
69         return kmap_atomic(page, KM_USER0);
70 }
71
72 static inline void dbi_kunmap_atomic(void *kvaddr)
73 {
74         kunmap_atomic(kvaddr, KM_USER0);
75 }
76 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
77
78
79 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
80 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
81 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
82 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
83 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
84
85 /* copy_to_user_page */
86 #ifndef copy_to_user_page
87 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
88 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
89 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
90 #endif /* copy_to_user_page */
91
92
93 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
94
95 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
96
97 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
98 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
99 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
100 #endif
101 #else
102 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
103 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
104
105 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
106 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
107 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
108 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
109 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
110
111 #ifdef CONFIG_HUGETLB_PAGE
112 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
113                 struct vm_area_struct *vma, struct page **pages, \
114                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
115                 int i, int write);
116 #endif
117
118 #ifdef __HAVE_ARCH_GATE_AREA
119 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
120 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
121 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
122 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
123 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
124 #endif /* __HAVE_ARCH_GATE_AREA */
125
126 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
127 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
128 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
129 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
130 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
131
132
133 static DECLARE_MOD_FUNC_DEP(follow_page, \
134                 struct page *, struct vm_area_struct * vma, \
135                 unsigned long address, unsigned int foll_flags);
136 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
137                 void, struct vm_area_struct *vma, struct page *page, \
138                 unsigned long vmaddr);
139 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
140                 struct page *, struct vm_area_struct *vma, \
141                 unsigned long addr, pte_t pte);
142 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
143                 void, struct vm_area_struct *vma, struct page *page, \
144                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
145
146
147 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
148 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
149                 void, struct task_struct *tsk);
150 #else
151 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
152                 void, struct rcu_head * rhp);
153 #endif
154
155         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
156 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
157
158         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
159 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
160
161 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
162 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
163         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
164                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
165 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
166 #endif
167 #else
168         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
169                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
170 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
171 #endif
172
173 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
174         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
175                         struct vm_area_struct *, struct mm_struct *mm)
176 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
177 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
178         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
179                         struct vm_area_struct *, struct task_struct *tsk)
180 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
181 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
182
183 #ifdef CONFIG_HUGETLB_PAGE
184         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
185         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
186 #endif
187
188 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
189 {
190 #ifdef __HAVE_ARCH_GATE_AREA
191 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
192         struct mm_struct *mm = task->mm;
193         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
194 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
195         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
196 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
197 #else /*__HAVE_ARCH_GATE_AREA */
198         return in_gate_area(task, addr);
199 #endif/*__HAVE_ARCH_GATE_AREA */
200 }
201
202
203 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
204 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
205 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
206 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
207 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
208 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
209 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
210
211 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
212 {
213 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
214         return in_gate_area_no_mm(addr);
215 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
216         return in_gate_area_no_task(addr);
217 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
218 }
219
220
221 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
222 DECLARE_MOD_DEP_WRAPPER (follow_page, \
223                         struct page *, struct vm_area_struct * vma, \
224                         unsigned long address, unsigned int foll_flags)
225 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
226 #endif
227 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
228                         void, struct vm_area_struct *vma, \
229                         struct page *page, unsigned long vmaddr)
230 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
231
232 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
233                         struct page *, struct vm_area_struct *vma, \
234                         unsigned long addr, pte_t pte)
235 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
236
237 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
238         void, struct vm_area_struct *vma, struct page *page, \
239         unsigned long uaddr, void *kaddr, unsigned long len, int write)
240 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
241
242
243
244 int init_module_dependencies(void)
245 {
246 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
247         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
248 #endif
249
250 #ifndef copy_to_user_page
251         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
252 #endif /* copy_to_user_page */
253
254         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
255         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
256         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
257
258 #ifdef CONFIG_HUGETLB_PAGE
259         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
260 #endif
261
262 #ifdef  __HAVE_ARCH_GATE_AREA
263         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
264 #endif
265
266 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
267
268 #ifndef is_zero_pfn
269         swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
270 #endif /* is_zero_pfn */
271
272         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
273 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
274         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
275 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
276
277         INIT_MOD_DEP_VAR(follow_page, follow_page);
278
279         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
280         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
281         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
282
283 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
284 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
285         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
286 # else
287         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
288 # endif
289 #else /*2.6.16 */
290         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
291 #endif
292
293 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
294         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
295 #endif
296
297         return 0;
298 }
299
300 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
301 #define GUP_FLAGS_WRITE                  0x1
302 #define GUP_FLAGS_FORCE                  0x2
303 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
304 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
305 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
306
307 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
308 static inline int use_zero_page(struct vm_area_struct *vma)
309 {
310         /*
311          * We don't want to optimize FOLL_ANON for make_pages_present()
312          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
313          * we want to get the page from the page tables to make sure
314          * that we serialize and update with any other user of that
315          * mapping.
316          */
317         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
318                 return 0;
319         /*
320          * And if we have a fault routine, it's not an anonymous region.
321          */
322         return !vma->vm_ops || !vma->vm_ops->fault;
323 }
324
325
326 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
327
328 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
329
330 #ifdef __HAVE_COLOR_ZERO_PAGE
331
332 static inline int swap_is_zero_pfn(unsigned long pfn)
333 {
334         unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
335         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
336 }
337
338 #else /* __HAVE_COLOR_ZERO_PAGE */
339
340 static inline int swap_is_zero_pfn(unsigned long pfn)
341 {
342         return pfn == swap_zero_pfn;
343 }
344 #endif /* __HAVE_COLOR_ZERO_PAGE */
345
346 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
347
348 static inline int swap_is_zero_pfn(unsigned long pfn)
349 {
350 #ifndef is_zero_pfn
351         return pfn == swap_zero_pfn;
352 #else /* is_zero_pfn */
353         return is_zero_pfn(pfn);
354 #endif /* is_zero_pfn */
355 }
356
357 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
358
359 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
360 {
361         return stack_guard_page_start(vma, addr) ||
362                         stack_guard_page_end(vma, addr+PAGE_SIZE);
363 }
364
365 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
366                         unsigned long start, int nr_pages, unsigned int gup_flags,
367                         struct page **pages, struct vm_area_struct **vmas,
368                         int *nonblocking)
369 {
370         int i;
371         unsigned long vm_flags;
372
373         if (nr_pages <= 0) {
374                 return 0;
375         }
376
377         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
378
379         /*
380          * Require read or write permissions.
381          * If FOLL_FORCE is set, we only require the "MAY" flags.
382          */
383         vm_flags  = (gup_flags & FOLL_WRITE) ?
384                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
385         vm_flags &= (gup_flags & FOLL_FORCE) ?
386                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
387         i = 0;
388
389         do {
390                 struct vm_area_struct *vma;
391
392                 vma = find_extend_vma(mm, start);
393                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
394                         unsigned long pg = start & PAGE_MASK;
395                         pgd_t *pgd;
396                         pud_t *pud;
397                         pmd_t *pmd;
398                         pte_t *pte;
399
400                         /* user gate pages are read-only */
401                         if (gup_flags & FOLL_WRITE) {
402                                 return i ? : -EFAULT;
403                         }
404                         if (pg > TASK_SIZE)
405                                 pgd = pgd_offset_k(pg);
406                         else
407                                 pgd = pgd_offset_gate(mm, pg);
408                         BUG_ON(pgd_none(*pgd));
409                         pud = pud_offset(pgd, pg);
410                         BUG_ON(pud_none(*pud));
411                         pmd = pmd_offset(pud, pg);
412                         if (pmd_none(*pmd)) {
413                                 return i ? : -EFAULT;
414                         }
415                         VM_BUG_ON(pmd_trans_huge(*pmd));
416                         pte = pte_offset_map(pmd, pg);
417                         if (pte_none(*pte)) {
418                                 pte_unmap(pte);
419                                 return i ? : -EFAULT;
420                         }
421                         vma = get_gate_vma(mm);
422                         if (pages) {
423                                 struct page *page;
424
425                                 page = vm_normal_page(vma, start, *pte);
426                                 if (!page) {
427                                         if (!(gup_flags & FOLL_DUMP) &&
428                                                 swap_is_zero_pfn(pte_pfn(*pte)))
429                                                 page = pte_page(*pte);
430                                         else {
431                                                 pte_unmap(pte);
432                                                 return i ? : -EFAULT;
433                                         }
434                                 }
435                                 pages[i] = page;
436                                 get_page(page);
437                         }
438                         pte_unmap(pte);
439                         goto next_page;
440                 }
441
442                 if (!vma ||
443                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
444                         !(vm_flags & vma->vm_flags)) {
445                         return i ? : -EFAULT;
446                 }
447
448                 if (is_vm_hugetlb_page(vma)) {
449                         i = follow_hugetlb_page(mm, vma, pages, vmas,
450                                         &start, &nr_pages, i, gup_flags);
451                         continue;
452                 }
453
454                 do {
455                         struct page *page;
456                         unsigned int foll_flags = gup_flags;
457
458                         /*
459                          * If we have a pending SIGKILL, don't keep faulting
460                          * pages and potentially allocating memory.
461                          */
462                         if (unlikely(fatal_signal_pending(current))) {
463                                 return i ? i : -ERESTARTSYS;
464                         }
465
466                         /* cond_resched(); */
467                         while (!(page = follow_page(vma, start, foll_flags))) {
468                                 int ret;
469                                 unsigned int fault_flags = 0;
470
471                                 /* For mlock, just skip the stack guard page. */
472                                 if (foll_flags & FOLL_MLOCK) {
473                                         if (stack_guard_page(vma, start))
474                                                 goto next_page;
475                                 }
476                                 if (foll_flags & FOLL_WRITE)
477                                         fault_flags |= FAULT_FLAG_WRITE;
478                                 if (nonblocking)
479                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
480                                 if (foll_flags & FOLL_NOWAIT)
481                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
482
483                                 ret = handle_mm_fault(mm, vma, start,
484                                                         fault_flags);
485
486                                 if (ret & VM_FAULT_ERROR) {
487                                         if (ret & VM_FAULT_OOM) {
488                                                 return i ? i : -ENOMEM;
489                                         }
490                                         if (ret & (VM_FAULT_HWPOISON |
491                                                                 VM_FAULT_HWPOISON_LARGE)) {
492                                                 if (i) {
493                                                         return i;
494                                                 }
495                                                 else if (gup_flags & FOLL_HWPOISON) {
496                                                         return -EHWPOISON;
497                                                 }
498                                                 else {
499                                                         return -EFAULT;
500                                                 }
501                                         }
502                                         if (ret & VM_FAULT_SIGBUS) {
503                                                 return i ? i : -EFAULT;
504                                         }
505                                         BUG();
506                                 }
507
508                                 if (tsk) {
509                                         if (ret & VM_FAULT_MAJOR)
510                                                 tsk->maj_flt++;
511                                         else
512                                                 tsk->min_flt++;
513                                 }
514
515                                 if (ret & VM_FAULT_RETRY) {
516                                         if (nonblocking)
517                                                 *nonblocking = 0;
518                                         return i;
519                                 }
520
521                                 /*
522                                  * The VM_FAULT_WRITE bit tells us that
523                                  * do_wp_page has broken COW when necessary,
524                                  * even if maybe_mkwrite decided not to set
525                                  * pte_write. We can thus safely do subsequent
526                                  * page lookups as if they were reads. But only
527                                  * do so when looping for pte_write is futile:
528                                  * in some cases userspace may also be wanting
529                                  * to write to the gotten user page, which a
530                                  * read fault here might prevent (a readonly
531                                  * page might get reCOWed by userspace write).
532                                  */
533                                 if ((ret & VM_FAULT_WRITE) &&
534                                         !(vma->vm_flags & VM_WRITE))
535                                         foll_flags &= ~FOLL_WRITE;
536
537                                 /* cond_resched(); */
538                         }
539                         if (IS_ERR(page)) {
540                                 return i ? i : PTR_ERR(page);
541                         }
542                         if (pages) {
543                                 pages[i] = page;
544
545                                 flush_anon_page(vma, page, start);
546                                 flush_dcache_page(page);
547                         }
548 next_page:
549                         if (vmas)
550                                 vmas[i] = vma;
551                         i++;
552                         start += PAGE_SIZE;
553                         nr_pages--;
554                 } while (nr_pages && start < vma->vm_end);
555         } while (nr_pages);
556
557         return i;
558 }
559 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
560
561 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
562                 unsigned long start, int len, int flags,
563                 struct page **pages, struct vm_area_struct **vmas)
564 {
565         int i;
566         unsigned int vm_flags = 0;
567         int write = !!(flags & GUP_FLAGS_WRITE);
568         int force = !!(flags & GUP_FLAGS_FORCE);
569         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
570
571         if (len <= 0)
572                 return 0;
573         /*
574          * Require read or write permissions.
575          * If 'force' is set, we only require the "MAY" flags.
576          */
577         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
578         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
579         i = 0;
580
581         do {
582                 struct vm_area_struct *vma;
583                 unsigned int foll_flags;
584
585                 vma = find_vma(mm, start);
586                 if (!vma && dbi_in_gate_area(tsk, start)) {
587                         unsigned long pg = start & PAGE_MASK;
588                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
589                         pgd_t *pgd;
590                         pud_t *pud;
591                         pmd_t *pmd;
592                         pte_t *pte;
593
594                         /* user gate pages are read-only */
595                         if (!ignore && write)
596                                 return i ? : -EFAULT;
597                         if (pg > TASK_SIZE)
598                                 pgd = pgd_offset_k(pg);
599                         else
600                                 pgd = pgd_offset_gate(mm, pg);
601                         BUG_ON(pgd_none(*pgd));
602                         pud = pud_offset(pgd, pg);
603                         BUG_ON(pud_none(*pud));
604                         pmd = pmd_offset(pud, pg);
605                         if (pmd_none(*pmd))
606                                 return i ? : -EFAULT;
607                         pte = pte_offset_map(pmd, pg);
608                         if (pte_none(*pte)) {
609                                 pte_unmap(pte);
610                                 return i ? : -EFAULT;
611                         }
612                         if (pages) {
613                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
614                                 pages[i] = page;
615                                 if (page)
616                                         get_page(page);
617                         }
618                         pte_unmap(pte);
619                         if (vmas)
620                                 vmas[i] = gate_vma;
621                         i++;
622                         start += PAGE_SIZE;
623                         len--;
624                         continue;
625                 }
626
627                 if (!vma ||
628                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
629                         (!ignore && !(vm_flags & vma->vm_flags)))
630                         return i ? : -EFAULT;
631
632                 if (is_vm_hugetlb_page(vma)) {
633 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
634                         i = follow_hugetlb_page(mm, vma, pages, vmas,
635                                                 &start, &len, i);
636 #else
637                         i = follow_hugetlb_page(mm, vma, pages, vmas,
638                                                 &start, &len, i, write);
639 #endif
640                         continue;
641                 }
642
643                 foll_flags = FOLL_TOUCH;
644                 if (pages)
645                         foll_flags |= FOLL_GET;
646
647 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
648 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
649                 if (!write && use_zero_page(vma))
650                         foll_flags |= FOLL_ANON;
651 #endif
652 #endif
653
654                 do {
655                         struct page *page;
656
657                         if (write)
658                                 foll_flags |= FOLL_WRITE;
659
660
661                         //cond_resched();
662
663                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
664                         while (!(page = follow_page(vma, start, foll_flags))) {
665                                 int ret;
666                                 ret = handle_mm_fault(mm, vma, start,
667                                                 foll_flags & FOLL_WRITE);
668
669 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
670                                 if (ret & VM_FAULT_WRITE)
671                                         foll_flags &= ~FOLL_WRITE;
672
673                                 switch (ret & ~VM_FAULT_WRITE) {
674                                 case VM_FAULT_MINOR:
675                                         tsk->min_flt++;
676                                         break;
677                                 case VM_FAULT_MAJOR:
678                                         tsk->maj_flt++;
679                                         break;
680                                 case VM_FAULT_SIGBUS:
681                                         return i ? i : -EFAULT;
682                                 case VM_FAULT_OOM:
683                                         return i ? i : -ENOMEM;
684                                 default:
685                                         BUG();
686                                 }
687
688 #else
689                                 if (ret & VM_FAULT_ERROR) {
690                                         if (ret & VM_FAULT_OOM)
691                                                 return i ? i : -ENOMEM;
692                                         else if (ret & VM_FAULT_SIGBUS)
693                                                 return i ? i : -EFAULT;
694                                         BUG();
695                                 }
696                                 if (ret & VM_FAULT_MAJOR)
697                                         tsk->maj_flt++;
698                                 else
699                                         tsk->min_flt++;
700
701                                 /*
702                                  * The VM_FAULT_WRITE bit tells us that
703                                  * do_wp_page has broken COW when necessary,
704                                  * even if maybe_mkwrite decided not to set
705                                  * pte_write. We can thus safely do subsequent
706                                  * page lookups as if they were reads. But only
707                                  * do so when looping for pte_write is futile:
708                                  * in some cases userspace may also be wanting
709                                  * to write to the gotten user page, which a
710                                  * read fault here might prevent (a readonly
711                                  * page might get reCOWed by userspace write).
712                                  */
713                                 if ((ret & VM_FAULT_WRITE) &&
714                                                 !(vma->vm_flags & VM_WRITE))
715                                         foll_flags &= ~FOLL_WRITE;
716
717                                 //cond_resched();
718 #endif
719
720                         }
721
722                         if (IS_ERR(page))
723                                 return i ? i : PTR_ERR(page);
724                         if (pages) {
725                                 pages[i] = page;
726
727 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
728                                 flush_anon_page(page, start);
729 #else
730                                 flush_anon_page(vma, page, start);
731 #endif
732                                 flush_dcache_page(page);
733                         }
734                         if (vmas)
735                                 vmas[i] = vma;
736                         i++;
737                         start += PAGE_SIZE;
738                         len--;
739                 } while (len && start < vma->vm_end);
740         } while (len);
741         return i;
742 }
743 #endif
744 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
745
746 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
747                 unsigned long start, int len, int write, int force,
748                 struct page **pages, struct vm_area_struct **vmas)
749 {
750 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
751 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
752         int flags = FOLL_TOUCH;
753
754         if (pages)
755                 flags |= FOLL_GET;
756         if (write)
757                 flags |= FOLL_WRITE;
758         if (force)
759                 flags |= FOLL_FORCE;
760 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
761         int flags = 0;
762
763         if (write)
764                 flags |= GUP_FLAGS_WRITE;
765         if (force)
766                 flags |= GUP_FLAGS_FORCE;
767 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
768
769         return __get_user_pages_uprobe(tsk, mm,
770                                 start, len, flags,
771 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
772                                                 pages, vmas, NULL);
773 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
774                                                 pages, vmas);
775 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
776 #else
777         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
778 #endif
779 }
780
781 #define ACCESS_PROCESS_OPTIMIZATION 0
782
783 #if ACCESS_PROCESS_OPTIMIZATION
784
785 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
786 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
787
788 static void read_data_current(unsigned long addr, void *buf, int len)
789 {
790         int step;
791         int pos = 0;
792
793         for (step = GET_STEP_4(len); len; len -= step) {
794                 switch (GET_STEP_4(len)) {
795                 case 1:
796                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
797                         step = 1;
798                         break;
799
800                 case 2:
801                 case 3:
802                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
803                         step = 2;
804                         break;
805
806                 case 4:
807                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
808                         step = 4;
809                         break;
810                 }
811
812                 pos += step;
813         }
814 }
815
816 // not working
817 static void write_data_current(unsigned long addr, void *buf, int len)
818 {
819         int step;
820         int pos = 0;
821
822         for (step = GET_STEP_4(len); len; len -= step) {
823                 switch (GET_STEP_4(len)) {
824                 case 1:
825                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
826                         step = 1;
827                         break;
828
829                 case 2:
830                 case 3:
831                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
832                         step = 2;
833                         break;
834
835                 case 4:
836                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
837                         step = 4;
838                         break;
839                 }
840
841                 pos += step;
842         }
843 }
844 #endif
845
846 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
847 {
848         struct mm_struct *mm;
849         struct vm_area_struct *vma;
850         void *old_buf = buf;
851
852         if (len <= 0) {
853                 return -1;
854         }
855
856 #if ACCESS_PROCESS_OPTIMIZATION
857         if (write == 0 && tsk == current) {
858                 read_data_current(addr, buf, len);
859                 return len;
860         }
861 #endif
862
863         mm = tsk->mm; /* function 'get_task_mm' is to be called */
864         if (!mm)
865                 return 0;
866
867         /* ignore errors, just check how much was successfully transferred */
868         while (len) {
869                 int bytes, ret, offset;
870                 void *maddr;
871                 struct page *page = NULL;
872
873                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
874                                                 write, 1, &page, &vma);
875
876                 if (ret <= 0) {
877                         /*
878                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
879                          * we can access using slightly different code.
880                          */
881 #ifdef CONFIG_HAVE_IOREMAP_PROT
882                         vma = find_vma(mm, addr);
883                         if (!vma)
884                                 break;
885                         if (vma->vm_ops && vma->vm_ops->access)
886                                 ret = vma->vm_ops->access(vma, addr, buf,
887                                                         len, write);
888                         if (ret <= 0)
889 #endif
890                                 break;
891                         bytes = ret;
892                 } else {
893                         bytes = len;
894                         offset = addr & (PAGE_SIZE-1);
895                         if (bytes > PAGE_SIZE-offset)
896                                 bytes = PAGE_SIZE-offset;
897
898                         maddr = dbi_kmap_atomic(page);
899
900                         if (write) {
901                                 copy_to_user_page(vma, page, addr,
902                                                         maddr + offset, buf, bytes);
903                                 set_page_dirty_lock(page);
904                         } else {
905                                 copy_from_user_page(vma, page, addr,
906                                                         buf, maddr + offset, bytes);
907                         }
908
909                         dbi_kunmap_atomic(maddr);
910                         page_cache_release(page);
911                 }
912                 len -= bytes;
913                 buf += bytes;
914                 addr += bytes;
915         }
916
917         return buf - old_buf;
918 }
919
920 int page_present (struct mm_struct *mm, unsigned long address)
921 {
922         pgd_t *pgd;
923         pud_t *pud;
924         pmd_t *pmd;
925         pte_t *ptep, pte;
926         unsigned long pfn;
927
928         pgd = pgd_offset(mm, address);
929         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
930                 goto out;
931
932         pud = pud_offset(pgd, address);
933         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
934                 goto out;
935
936         pmd = pmd_offset(pud, address);
937         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
938                 goto out;
939
940         ptep = pte_offset_map(pmd, address);
941         if (!ptep)
942                 goto out;
943
944         pte = *ptep;
945         pte_unmap(ptep);
946         if (pte_present(pte)) {
947                 pfn = pte_pfn(pte);
948                 if (pfn_valid(pfn)) {
949                         return 1;
950                 }
951         }
952
953 out:
954         return 0;
955 }
956
957
958 EXPORT_SYMBOL_GPL (page_present);
959 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
960