fix optimization access_process_vm_atomic
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
24  *
25  */
26
27 #include <linux/module.h>
28 #include <linux/sched.h>
29
30 #include <asm/pgtable.h>
31
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
34
35
36 #include <linux/slab.h>
37 #include <linux/mm.h>
38
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41
42 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
43 struct mm_struct* init_mm_ptr;
44 struct mm_struct init_mm;
45 #endif
46
47
48 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
49 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
50 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
51 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
52 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
53
54 /* copy_to_user_page */
55 #ifndef copy_to_user_page
56 DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
57 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
58 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
59 #endif /* copy_to_user_page */
60
61
62 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
63 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
64
65 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
66
67 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
68 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
69 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
70 #endif
71 #else
72 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
73 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
74
75 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
76 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
77 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
78 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
79 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
80
81 #ifdef CONFIG_HUGETLB_PAGE
82 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write);
83 #endif
84
85 #ifdef __HAVE_ARCH_GATE_AREA
86 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
87 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
88 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
89 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
90 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
91 #endif /* __HAVE_ARCH_GATE_AREA */
92
93 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
94 DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
95 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
96 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
97 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
98
99
100 DECLARE_MOD_FUNC_DEP(follow_page, \
101                 struct page *, struct vm_area_struct * vma, \
102                 unsigned long address, unsigned int foll_flags);
103 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
104                 void, struct vm_area_struct *vma, struct page *page, \
105                 unsigned long vmaddr);
106 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
107                 struct page *, struct vm_area_struct *vma, \
108                 unsigned long addr, pte_t pte);
109
110 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
111                 void, struct vm_area_struct *vma, struct page *page, \
112                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
113
114
115 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
116 DECLARE_MOD_FUNC_DEP(put_task_struct, \
117                 void, struct task_struct *tsk);
118 #else
119 DECLARE_MOD_FUNC_DEP(put_task_struct, \
120                 void, struct rcu_head * rhp);
121 #endif
122
123         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
124 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
125
126         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
127 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
128
129 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
130 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
131         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
132                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
133 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
134 #endif
135 #else
136         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
137                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
138 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
139 #endif
140
141 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
142         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
143                         struct vm_area_struct *, struct mm_struct *mm)
144 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
145 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
146         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
147                         struct vm_area_struct *, struct task_struct *tsk)
148 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
149 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
150
151 #ifdef CONFIG_HUGETLB_PAGE
152         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
153         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
154 #endif
155
156 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
157 {
158 #ifdef __HAVE_ARCH_GATE_AREA
159 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
160         struct mm_struct *mm = task->mm;
161         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
162 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
163         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
164 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
165 #else /*__HAVE_ARCH_GATE_AREA */
166         return in_gate_area(task, addr);
167 #endif/*__HAVE_ARCH_GATE_AREA */
168 }
169
170
171 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
172 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
173 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
174 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
175 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
176 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
177 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
178
179 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
180 {
181 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
182         return in_gate_area_no_mm(addr);
183 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
184         return in_gate_area_no_task(addr);
185 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
186 }
187
188
189 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
190         DECLARE_MOD_DEP_WRAPPER (follow_page, \
191                         struct page *, struct vm_area_struct * vma, \
192                         unsigned long address, unsigned int foll_flags)
193 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
194 #endif
195         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
196                         void, struct vm_area_struct *vma, \
197                         struct page *page, unsigned long vmaddr)
198 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
199
200         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
201                         struct page *, struct vm_area_struct *vma, \
202                         unsigned long addr, pte_t pte)
203 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
204
205         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
206                         void, struct vm_area_struct *vma, struct page *page, \
207                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
208 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
209
210
211 int init_module_dependencies()
212 {
213
214 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
215         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
216         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
217 #endif
218
219 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
220         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
221 #endif
222
223 #ifndef copy_to_user_page
224         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
225 #endif /* copy_to_user_page */
226
227         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
228         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
229         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
230
231 #ifdef CONFIG_HUGETLB_PAGE
232         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
233 #endif
234
235 #ifdef  __HAVE_ARCH_GATE_AREA
236         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
237 #endif
238
239 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
240         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
241 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
242         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
243 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
244
245         INIT_MOD_DEP_VAR(follow_page, follow_page);
246
247         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
248         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
249         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
250
251 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
252 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
253         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
254 # else
255         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
256 # endif
257 #else /*2.6.16 */
258         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
259 #endif
260
261 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
262         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
263 #endif
264
265         return 0;
266 }
267
268 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
269 #define GUP_FLAGS_WRITE                  0x1
270 #define GUP_FLAGS_FORCE                  0x2
271 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
272 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
273 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
274
275 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
276 static inline int use_zero_page(struct vm_area_struct *vma)
277 {
278         /*
279          * We don't want to optimize FOLL_ANON for make_pages_present()
280          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
281          * we want to get the page from the page tables to make sure
282          * that we serialize and update with any other user of that
283          * mapping.
284          */
285         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
286                 return 0;
287         /*
288          * And if we have a fault routine, it's not an anonymous region.
289          */
290         return !vma->vm_ops || !vma->vm_ops->fault;
291 }
292
293
294 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
295 unsigned long zero_pfn __read_mostly;
296
297 #ifndef is_zero_pfn
298 static inline int is_zero_pfn(unsigned long pfn)
299 {
300         return pfn == zero_pfn;
301 }
302 #endif
303
304 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
305 {
306         return stack_guard_page_start(vma, addr) ||
307                stack_guard_page_end(vma, addr+PAGE_SIZE);
308 }
309
310 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
311                      unsigned long start, int nr_pages, unsigned int gup_flags,
312                      struct page **pages, struct vm_area_struct **vmas,
313                      int *nonblocking)
314 {
315         int i;
316         unsigned long vm_flags;
317
318         if (nr_pages <= 0) {
319                 return 0;
320         }
321
322         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
323
324         /*
325          * Require read or write permissions.
326          * If FOLL_FORCE is set, we only require the "MAY" flags.
327          */
328         vm_flags  = (gup_flags & FOLL_WRITE) ?
329                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
330         vm_flags &= (gup_flags & FOLL_FORCE) ?
331                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
332         i = 0;
333
334         do {
335                 struct vm_area_struct *vma;
336
337                 vma = find_extend_vma(mm, start);
338                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
339                         unsigned long pg = start & PAGE_MASK;
340                         pgd_t *pgd;
341                         pud_t *pud;
342                         pmd_t *pmd;
343                         pte_t *pte;
344
345                         /* user gate pages are read-only */
346                         if (gup_flags & FOLL_WRITE) {
347                                 return i ? : -EFAULT;
348                         }
349                         if (pg > TASK_SIZE)
350                                 pgd = pgd_offset_k(pg);
351                         else
352                                 pgd = pgd_offset_gate(mm, pg);
353                         BUG_ON(pgd_none(*pgd));
354                         pud = pud_offset(pgd, pg);
355                         BUG_ON(pud_none(*pud));
356                         pmd = pmd_offset(pud, pg);
357                         if (pmd_none(*pmd)) {
358                                 return i ? : -EFAULT;
359                         }
360                         VM_BUG_ON(pmd_trans_huge(*pmd));
361                         pte = pte_offset_map(pmd, pg);
362                         if (pte_none(*pte)) {
363                                 pte_unmap(pte);
364                                 return i ? : -EFAULT;
365                         }
366                         vma = get_gate_vma(mm);
367                         if (pages) {
368                                 struct page *page;
369
370                                 page = vm_normal_page(vma, start, *pte);
371                                 if (!page) {
372                                         if (!(gup_flags & FOLL_DUMP) &&
373                                              is_zero_pfn(pte_pfn(*pte)))
374                                                 page = pte_page(*pte);
375                                         else {
376                                                 pte_unmap(pte);
377                                                 return i ? : -EFAULT;
378                                         }
379                                 }
380                                 pages[i] = page;
381                                 get_page(page);
382                         }
383                         pte_unmap(pte);
384                         goto next_page;
385                 }
386
387                 if (!vma ||
388                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
389                     !(vm_flags & vma->vm_flags)) {
390                         return i ? : -EFAULT;
391                 }
392
393                 if (is_vm_hugetlb_page(vma)) {
394                         i = follow_hugetlb_page(mm, vma, pages, vmas,
395                                         &start, &nr_pages, i, gup_flags);
396                         continue;
397                 }
398
399                 do {
400                         struct page *page;
401                         unsigned int foll_flags = gup_flags;
402
403                         /*
404                          * If we have a pending SIGKILL, don't keep faulting
405                          * pages and potentially allocating memory.
406                          */
407                         if (unlikely(fatal_signal_pending(current))) {
408                                 return i ? i : -ERESTARTSYS;
409                         }
410
411                         /* cond_resched(); */
412                         while (!(page = follow_page(vma, start, foll_flags))) {
413                                 int ret;
414                                 unsigned int fault_flags = 0;
415
416                                 /* For mlock, just skip the stack guard page. */
417                                 if (foll_flags & FOLL_MLOCK) {
418                                         if (stack_guard_page(vma, start))
419                                                 goto next_page;
420                                 }
421                                 if (foll_flags & FOLL_WRITE)
422                                         fault_flags |= FAULT_FLAG_WRITE;
423                                 if (nonblocking)
424                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
425                                 if (foll_flags & FOLL_NOWAIT)
426                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
427
428                                 ret = handle_mm_fault(mm, vma, start,
429                                                         fault_flags);
430
431                                 if (ret & VM_FAULT_ERROR) {
432                                         if (ret & VM_FAULT_OOM) {
433                                                 return i ? i : -ENOMEM;
434                                         }
435                                         if (ret & (VM_FAULT_HWPOISON |
436                                                    VM_FAULT_HWPOISON_LARGE)) {
437                                                 if (i) {
438                                                         return i;
439                                                 }
440                                                 else if (gup_flags & FOLL_HWPOISON) {
441                                                         return -EHWPOISON;
442                                                 }
443                                                 else {
444                                                         return -EFAULT;
445                                                 }
446                                         }
447                                         if (ret & VM_FAULT_SIGBUS) {
448                                                 return i ? i : -EFAULT;
449                                         }
450                                         BUG();
451                                 }
452
453                                 if (tsk) {
454                                         if (ret & VM_FAULT_MAJOR)
455                                                 tsk->maj_flt++;
456                                         else
457                                                 tsk->min_flt++;
458                                 }
459
460                                 if (ret & VM_FAULT_RETRY) {
461                                         if (nonblocking)
462                                                 *nonblocking = 0;
463                                         return i;
464                                 }
465
466                                 /*
467                                  * The VM_FAULT_WRITE bit tells us that
468                                  * do_wp_page has broken COW when necessary,
469                                  * even if maybe_mkwrite decided not to set
470                                  * pte_write. We can thus safely do subsequent
471                                  * page lookups as if they were reads. But only
472                                  * do so when looping for pte_write is futile:
473                                  * in some cases userspace may also be wanting
474                                  * to write to the gotten user page, which a
475                                  * read fault here might prevent (a readonly
476                                  * page might get reCOWed by userspace write).
477                                  */
478                                 if ((ret & VM_FAULT_WRITE) &&
479                                     !(vma->vm_flags & VM_WRITE))
480                                         foll_flags &= ~FOLL_WRITE;
481
482                                 /* cond_resched(); */
483                         }
484                         if (IS_ERR(page)) {
485                                 return i ? i : PTR_ERR(page);
486                         }
487                         if (pages) {
488                                 pages[i] = page;
489
490                                 flush_anon_page(vma, page, start);
491                                 flush_dcache_page(page);
492                         }
493 next_page:
494                         if (vmas)
495                                 vmas[i] = vma;
496                         i++;
497                         start += PAGE_SIZE;
498                         nr_pages--;
499                 } while (nr_pages && start < vma->vm_end);
500         } while (nr_pages);
501
502         return i;
503 }
504 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
505
506 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
507                      unsigned long start, int len, int flags,
508                 struct page **pages, struct vm_area_struct **vmas)
509 {
510         int i;
511         unsigned int vm_flags = 0;
512         int write = !!(flags & GUP_FLAGS_WRITE);
513         int force = !!(flags & GUP_FLAGS_FORCE);
514         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
515         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
516
517         if (len <= 0)
518                 return 0;
519         /*
520          * Require read or write permissions.
521          * If 'force' is set, we only require the "MAY" flags.
522          */
523         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
524         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
525         i = 0;
526
527         do {
528                 struct vm_area_struct *vma;
529                 unsigned int foll_flags;
530
531                 vma = find_vma(mm, start);
532                 if (!vma && dbi_in_gate_area(tsk, start)) {
533                         unsigned long pg = start & PAGE_MASK;
534                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
535                         pgd_t *pgd;
536                         pud_t *pud;
537                         pmd_t *pmd;
538                         pte_t *pte;
539
540                         /* user gate pages are read-only */
541                         if (!ignore && write)
542                                 return i ? : -EFAULT;
543                         if (pg > TASK_SIZE)
544                                 pgd = pgd_offset_k(pg);
545                         else
546                                 pgd = pgd_offset_gate(mm, pg);
547                         BUG_ON(pgd_none(*pgd));
548                         pud = pud_offset(pgd, pg);
549                         BUG_ON(pud_none(*pud));
550                         pmd = pmd_offset(pud, pg);
551                         if (pmd_none(*pmd))
552                                 return i ? : -EFAULT;
553                         pte = pte_offset_map(pmd, pg);
554                         if (pte_none(*pte)) {
555                                 pte_unmap(pte);
556                                 return i ? : -EFAULT;
557                         }
558                         if (pages) {
559                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
560                                 pages[i] = page;
561                                 if (page)
562                                         get_page(page);
563                         }
564                         pte_unmap(pte);
565                         if (vmas)
566                                 vmas[i] = gate_vma;
567                         i++;
568                         start += PAGE_SIZE;
569                         len--;
570                         continue;
571                 }
572
573                 if (!vma ||
574                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
575                     (!ignore && !(vm_flags & vma->vm_flags)))
576                         return i ? : -EFAULT;
577
578                 if (is_vm_hugetlb_page(vma)) {
579 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
580                         i = follow_hugetlb_page(mm, vma, pages, vmas,
581                                                 &start, &len, i);
582 #else
583                         i = follow_hugetlb_page(mm, vma, pages, vmas,
584                                                 &start, &len, i, write);
585 #endif
586                         continue;
587                 }
588
589                 foll_flags = FOLL_TOUCH;
590                 if (pages)
591                         foll_flags |= FOLL_GET;
592
593 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
594 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
595                 if (!write && use_zero_page(vma))
596                   foll_flags |= FOLL_ANON;
597 #endif
598 #endif
599
600                 do {
601                         struct page *page;
602
603 #if 0
604                         /*
605                          * If we have a pending SIGKILL, don't keep faulting
606                          * pages and potentially allocating memory, unless
607                          * current is handling munlock--e.g., on exit. In
608                          * that case, we are not allocating memory.  Rather,
609                          * we're only unlocking already resident/mapped pages.
610                          */
611                         if (unlikely(!ignore_sigkill &&
612                                         fatal_signal_pending(current)))
613                                 return i ? i : -ERESTARTSYS;
614 #endif
615
616                         if (write)
617                                 foll_flags |= FOLL_WRITE;
618
619
620                         //cond_resched();
621
622                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
623                         while (!(page = follow_page(vma, start, foll_flags))) {
624                                 int ret;
625                                 ret = handle_mm_fault(mm, vma, start,
626                                                 foll_flags & FOLL_WRITE);
627
628 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
629                                 if (ret & VM_FAULT_WRITE)
630                                   foll_flags &= ~FOLL_WRITE;
631
632                                 switch (ret & ~VM_FAULT_WRITE) {
633                                 case VM_FAULT_MINOR:
634                                   tsk->min_flt++;
635                                   break;
636                                 case VM_FAULT_MAJOR:
637                                   tsk->maj_flt++;
638                                   break;
639                                 case VM_FAULT_SIGBUS:
640                                   return i ? i : -EFAULT;
641                                 case VM_FAULT_OOM:
642                                   return i ? i : -ENOMEM;
643                                 default:
644                                   BUG();
645                                 }
646
647 #else
648                                 if (ret & VM_FAULT_ERROR) {
649                                   if (ret & VM_FAULT_OOM)
650                                     return i ? i : -ENOMEM;
651                                   else if (ret & VM_FAULT_SIGBUS)
652                                     return i ? i : -EFAULT;
653                                   BUG();
654                                 }
655                                 if (ret & VM_FAULT_MAJOR)
656                                   tsk->maj_flt++;
657                                 else
658                                   tsk->min_flt++;
659
660                                 /*
661                                  * The VM_FAULT_WRITE bit tells us that
662                                  * do_wp_page has broken COW when necessary,
663                                  * even if maybe_mkwrite decided not to set
664                                  * pte_write. We can thus safely do subsequent
665                                  * page lookups as if they were reads. But only
666                                  * do so when looping for pte_write is futile:
667                                  * in some cases userspace may also be wanting
668                                  * to write to the gotten user page, which a
669                                  * read fault here might prevent (a readonly
670                                  * page might get reCOWed by userspace write).
671                                  */
672                                 if ((ret & VM_FAULT_WRITE) &&
673                                     !(vma->vm_flags & VM_WRITE))
674                                   foll_flags &= ~FOLL_WRITE;
675
676                                 //cond_resched();
677 #endif
678
679                         }
680
681                         if (IS_ERR(page))
682                                 return i ? i : PTR_ERR(page);
683                         if (pages) {
684                                 pages[i] = page;
685
686 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
687                                 flush_anon_page(page, start);
688 #else
689                                 flush_anon_page(vma, page, start);
690 #endif
691                                 flush_dcache_page(page);
692                         }
693                         if (vmas)
694                                 vmas[i] = vma;
695                         i++;
696                         start += PAGE_SIZE;
697                         len--;
698                 } while (len && start < vma->vm_end);
699         } while (len);
700         return i;
701 }
702 #endif
703 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
704
705 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
706                 unsigned long start, int len, int write, int force,
707                 struct page **pages, struct vm_area_struct **vmas)
708 {
709 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
710 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
711         int flags = FOLL_TOUCH;
712
713         if (pages)
714                 flags |= FOLL_GET;
715         if (write)
716                 flags |= FOLL_WRITE;
717         if (force)
718                 flags |= FOLL_FORCE;
719 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
720         int flags = 0;
721
722         if (write)
723                 flags |= GUP_FLAGS_WRITE;
724         if (force)
725                 flags |= GUP_FLAGS_FORCE;
726 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
727
728         return __get_user_pages_uprobe(tsk, mm,
729                                 start, len, flags,
730 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
731                                        pages, vmas, 0);
732 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
733                                        pages, vmas);
734 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
735 #else
736         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
737 #endif
738 }
739
740 #define ACCESS_PROCESS_OPTIMIZATION 0
741
742 #if ACCESS_PROCESS_OPTIMIZATION
743
744 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
745 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
746
747 static void read_data_current(unsigned long addr, void *buf, int len)
748 {
749         int step;
750         int pos = 0;
751
752         for (step = GET_STEP_4(len); len; len -= step) {
753                 switch (GET_STEP_4(len)) {
754                 case 1:
755                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
756                         step = 1;
757                         break;
758
759                 case 2:
760                 case 3:
761                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
762                         step = 2;
763                         break;
764
765                 case 4:
766                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
767                         step = 4;
768                         break;
769                 }
770
771                 pos += step;
772         }
773 }
774
775 // not working
776 static void write_data_current(unsigned long addr, void *buf, int len)
777 {
778         int step;
779         int pos = 0;
780
781         for (step = GET_STEP_4(len); len; len -= step) {
782                 switch (GET_STEP_4(len)) {
783                 case 1:
784                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
785                         step = 1;
786                         break;
787
788                 case 2:
789                 case 3:
790                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
791                         step = 2;
792                         break;
793
794                 case 4:
795                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
796                         step = 4;
797                         break;
798                 }
799
800                 pos += step;
801         }
802 }
803 #endif
804
805 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
806 {
807         struct mm_struct *mm;
808         struct vm_area_struct *vma;
809         void *old_buf = buf;
810
811         if (len <= 0) {
812                 return -1;
813         }
814
815 #if ACCESS_PROCESS_OPTIMIZATION
816         if (write == 0 && tsk == current) {
817                 read_data_current(addr, buf, len);
818                 return len;
819         }
820 #endif
821
822         mm = tsk->mm; /* function 'get_task_mm' is to be called */
823         if (!mm)
824                 return 0;
825
826         /* ignore errors, just check how much was successfully transferred */
827         while (len) {
828                 int bytes, ret, offset;
829                 void *maddr;
830                 struct page *page = NULL;
831
832                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
833                                             write, 1, &page, &vma);
834
835                 if (ret <= 0) {
836                         /*
837                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
838                          * we can access using slightly different code.
839                          */
840 #ifdef CONFIG_HAVE_IOREMAP_PROT
841                         vma = find_vma(mm, addr);
842                         if (!vma)
843                                 break;
844                         if (vma->vm_ops && vma->vm_ops->access)
845                                 ret = vma->vm_ops->access(vma, addr, buf,
846                                                           len, write);
847                         if (ret <= 0)
848 #endif
849                                 break;
850                         bytes = ret;
851                 } else {
852                         bytes = len;
853                         offset = addr & (PAGE_SIZE-1);
854                         if (bytes > PAGE_SIZE-offset)
855                                 bytes = PAGE_SIZE-offset;
856
857                         maddr = kmap_atomic(page);
858
859                         if (write) {
860                                 copy_to_user_page(vma, page, addr,
861                                                   maddr + offset, buf, bytes);
862                                 set_page_dirty_lock(page);
863                         } else {
864                                 copy_from_user_page(vma, page, addr,
865                                                     buf, maddr + offset, bytes);
866                         }
867
868                         kunmap_atomic(maddr);
869                         page_cache_release(page);
870                 }
871                 len -= bytes;
872                 buf += bytes;
873                 addr += bytes;
874         }
875
876         return buf - old_buf;
877 }
878
879 int page_present (struct mm_struct *mm, unsigned long address)
880 {
881         pgd_t *pgd;
882         pud_t *pud;
883         pmd_t *pmd;
884         pte_t *ptep, pte;
885         unsigned long pfn;
886
887         pgd = pgd_offset(mm, address);
888         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
889                 goto out;
890
891         pud = pud_offset(pgd, address);
892         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
893                 goto out;
894
895         pmd = pmd_offset(pud, address);
896         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
897                 goto out;
898
899         ptep = pte_offset_map(pmd, address);
900         if (!ptep)
901                 goto out;
902
903         pte = *ptep;
904         pte_unmap(ptep);
905         if (pte_present(pte)) {
906                 pfn = pte_pfn(pte);
907                 if (pfn_valid(pfn)) {
908                         return 1;
909                 }
910         }
911
912 out:
913         return 0;
914 }
915
916
917 EXPORT_SYMBOL_GPL (page_present);
918 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
919 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
920