Merge branch 'dev' of 106.109.8.71:/srv/git/dbi into dev
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
24  *
25  */
26
27 #include <linux/module.h>
28 #include <linux/sched.h>
29
30 #include <asm/pgtable.h>
31
32 #include "dbi_kprobes_deps.h"
33 #include "dbi_kdebug.h"
34
35
36 #include <linux/slab.h>
37 #include <linux/mm.h>
38
39 unsigned long sched_addr;
40 unsigned long fork_addr;
41 unsigned long exit_addr;
42
43 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
44 static struct mm_struct* init_mm_ptr;
45 struct mm_struct init_mm;
46 #endif
47
48
49 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
50 static inline void *dbi_kmap_atomic(struct page *page)
51 {
52         return kmap_atomic(page);
53 }
54 static inline void dbi_kunmap_atomic(void *kvaddr)
55 {
56         kunmap_atomic(kvaddr);
57 }
58 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
59 static inline void *dbi_kmap_atomic(struct page *page)
60 {
61         return kmap_atomic(page, KM_USER0);
62 }
63
64 static inline void dbi_kunmap_atomic(void *kvaddr)
65 {
66         kunmap_atomic(kvaddr, KM_USER0);
67 }
68 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
69
70
71 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
72 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
73 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
74 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
75 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
76
77 /* copy_to_user_page */
78 #ifndef copy_to_user_page
79 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
80 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
81 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
82 #endif /* copy_to_user_page */
83
84
85 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
86
87 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
88
89 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
90 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
91 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
92 #endif
93 #else
94 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
95 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
96
97 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
98 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
99 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
100 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
101 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
102
103 #ifdef CONFIG_HUGETLB_PAGE
104 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
105                 struct vm_area_struct *vma, struct page **pages, \
106                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
107                 int i, int write);
108 #endif
109
110 #ifdef __HAVE_ARCH_GATE_AREA
111 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
112 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
113 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
114 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
115 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
116 #endif /* __HAVE_ARCH_GATE_AREA */
117
118 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
119 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
120 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
121 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
122 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
123
124
125 static DECLARE_MOD_FUNC_DEP(follow_page, \
126                 struct page *, struct vm_area_struct * vma, \
127                 unsigned long address, unsigned int foll_flags);
128 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
129                 void, struct vm_area_struct *vma, struct page *page, \
130                 unsigned long vmaddr);
131 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
132                 struct page *, struct vm_area_struct *vma, \
133                 unsigned long addr, pte_t pte);
134 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
135                 void, struct vm_area_struct *vma, struct page *page, \
136                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
137
138
139 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
140 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
141                 void, struct task_struct *tsk);
142 #else
143 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
144                 void, struct rcu_head * rhp);
145 #endif
146
147         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
148 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
149
150         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
151 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
152
153 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
154 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
155         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
156                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
157 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
158 #endif
159 #else
160         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
161                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
162 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
163 #endif
164
165 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
166         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
167                         struct vm_area_struct *, struct mm_struct *mm)
168 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
169 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
170         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
171                         struct vm_area_struct *, struct task_struct *tsk)
172 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
173 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
174
175 #ifdef CONFIG_HUGETLB_PAGE
176         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
177         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
178 #endif
179
180 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
181 {
182 #ifdef __HAVE_ARCH_GATE_AREA
183 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
184         struct mm_struct *mm = task->mm;
185         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
186 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
187         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
188 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
189 #else /*__HAVE_ARCH_GATE_AREA */
190         return in_gate_area(task, addr);
191 #endif/*__HAVE_ARCH_GATE_AREA */
192 }
193
194
195 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
196 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
197 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
198 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
199 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
200 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
201 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
202
203 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
204 {
205 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
206         return in_gate_area_no_mm(addr);
207 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
208         return in_gate_area_no_task(addr);
209 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
210 }
211
212
213 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
214 DECLARE_MOD_DEP_WRAPPER (follow_page, \
215                         struct page *, struct vm_area_struct * vma, \
216                         unsigned long address, unsigned int foll_flags)
217 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
218 #endif
219 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
220                         void, struct vm_area_struct *vma, \
221                         struct page *page, unsigned long vmaddr)
222 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
223
224 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
225                         struct page *, struct vm_area_struct *vma, \
226                         unsigned long addr, pte_t pte)
227 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
228
229 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
230         void, struct vm_area_struct *vma, struct page *page, \
231         unsigned long uaddr, void *kaddr, unsigned long len, int write)
232 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
233
234
235 int init_module_dependencies(void)
236 {
237
238 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
239         init_mm_ptr = (struct mm_struct*)swap_ksyms("init_mm");
240 //      memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
241 #endif
242
243 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
244         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
245 #endif
246
247 #ifndef copy_to_user_page
248         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
249 #endif /* copy_to_user_page */
250
251         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
252         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
253         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
254
255 #ifdef CONFIG_HUGETLB_PAGE
256         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
257 #endif
258
259 #ifdef  __HAVE_ARCH_GATE_AREA
260         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
261 #endif
262
263 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
264         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
265 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
266         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
267 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
268
269         INIT_MOD_DEP_VAR(follow_page, follow_page);
270
271         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
272         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
273         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
274
275 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
276 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
277         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
278 # else
279         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
280 # endif
281 #else /*2.6.16 */
282         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
283 #endif
284
285 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
286         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
287 #endif
288
289         return 0;
290 }
291
292 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
293 #define GUP_FLAGS_WRITE                  0x1
294 #define GUP_FLAGS_FORCE                  0x2
295 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
296 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
297 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
298
299 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
300 static inline int use_zero_page(struct vm_area_struct *vma)
301 {
302         /*
303          * We don't want to optimize FOLL_ANON for make_pages_present()
304          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
305          * we want to get the page from the page tables to make sure
306          * that we serialize and update with any other user of that
307          * mapping.
308          */
309         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
310                 return 0;
311         /*
312          * And if we have a fault routine, it's not an anonymous region.
313          */
314         return !vma->vm_ops || !vma->vm_ops->fault;
315 }
316
317
318 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
319 static unsigned long zero_pfn __read_mostly;
320
321 #ifndef is_zero_pfn
322 static inline int is_zero_pfn(unsigned long pfn)
323 {
324         return pfn == zero_pfn;
325 }
326 #endif
327
328 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
329 {
330         return stack_guard_page_start(vma, addr) ||
331                stack_guard_page_end(vma, addr+PAGE_SIZE);
332 }
333
334 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
335                      unsigned long start, int nr_pages, unsigned int gup_flags,
336                      struct page **pages, struct vm_area_struct **vmas,
337                      int *nonblocking)
338 {
339         int i;
340         unsigned long vm_flags;
341
342         if (nr_pages <= 0) {
343                 return 0;
344         }
345
346         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
347
348         /*
349          * Require read or write permissions.
350          * If FOLL_FORCE is set, we only require the "MAY" flags.
351          */
352         vm_flags  = (gup_flags & FOLL_WRITE) ?
353                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
354         vm_flags &= (gup_flags & FOLL_FORCE) ?
355                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
356         i = 0;
357
358         do {
359                 struct vm_area_struct *vma;
360
361                 vma = find_extend_vma(mm, start);
362                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
363                         unsigned long pg = start & PAGE_MASK;
364                         pgd_t *pgd;
365                         pud_t *pud;
366                         pmd_t *pmd;
367                         pte_t *pte;
368
369                         /* user gate pages are read-only */
370                         if (gup_flags & FOLL_WRITE) {
371                                 return i ? : -EFAULT;
372                         }
373                         if (pg > TASK_SIZE)
374                                 pgd = pgd_offset_k(pg);
375                         else
376                                 pgd = pgd_offset_gate(mm, pg);
377                         BUG_ON(pgd_none(*pgd));
378                         pud = pud_offset(pgd, pg);
379                         BUG_ON(pud_none(*pud));
380                         pmd = pmd_offset(pud, pg);
381                         if (pmd_none(*pmd)) {
382                                 return i ? : -EFAULT;
383                         }
384                         VM_BUG_ON(pmd_trans_huge(*pmd));
385                         pte = pte_offset_map(pmd, pg);
386                         if (pte_none(*pte)) {
387                                 pte_unmap(pte);
388                                 return i ? : -EFAULT;
389                         }
390                         vma = get_gate_vma(mm);
391                         if (pages) {
392                                 struct page *page;
393
394                                 page = vm_normal_page(vma, start, *pte);
395                                 if (!page) {
396                                         if (!(gup_flags & FOLL_DUMP) &&
397                                              is_zero_pfn(pte_pfn(*pte)))
398                                                 page = pte_page(*pte);
399                                         else {
400                                                 pte_unmap(pte);
401                                                 return i ? : -EFAULT;
402                                         }
403                                 }
404                                 pages[i] = page;
405                                 get_page(page);
406                         }
407                         pte_unmap(pte);
408                         goto next_page;
409                 }
410
411                 if (!vma ||
412                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
413                     !(vm_flags & vma->vm_flags)) {
414                         return i ? : -EFAULT;
415                 }
416
417                 if (is_vm_hugetlb_page(vma)) {
418                         i = follow_hugetlb_page(mm, vma, pages, vmas,
419                                         &start, &nr_pages, i, gup_flags);
420                         continue;
421                 }
422
423                 do {
424                         struct page *page;
425                         unsigned int foll_flags = gup_flags;
426
427                         /*
428                          * If we have a pending SIGKILL, don't keep faulting
429                          * pages and potentially allocating memory.
430                          */
431                         if (unlikely(fatal_signal_pending(current))) {
432                                 return i ? i : -ERESTARTSYS;
433                         }
434
435                         /* cond_resched(); */
436                         while (!(page = follow_page(vma, start, foll_flags))) {
437                                 int ret;
438                                 unsigned int fault_flags = 0;
439
440                                 /* For mlock, just skip the stack guard page. */
441                                 if (foll_flags & FOLL_MLOCK) {
442                                         if (stack_guard_page(vma, start))
443                                                 goto next_page;
444                                 }
445                                 if (foll_flags & FOLL_WRITE)
446                                         fault_flags |= FAULT_FLAG_WRITE;
447                                 if (nonblocking)
448                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
449                                 if (foll_flags & FOLL_NOWAIT)
450                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
451
452                                 ret = handle_mm_fault(mm, vma, start,
453                                                         fault_flags);
454
455                                 if (ret & VM_FAULT_ERROR) {
456                                         if (ret & VM_FAULT_OOM) {
457                                                 return i ? i : -ENOMEM;
458                                         }
459                                         if (ret & (VM_FAULT_HWPOISON |
460                                                    VM_FAULT_HWPOISON_LARGE)) {
461                                                 if (i) {
462                                                         return i;
463                                                 }
464                                                 else if (gup_flags & FOLL_HWPOISON) {
465                                                         return -EHWPOISON;
466                                                 }
467                                                 else {
468                                                         return -EFAULT;
469                                                 }
470                                         }
471                                         if (ret & VM_FAULT_SIGBUS) {
472                                                 return i ? i : -EFAULT;
473                                         }
474                                         BUG();
475                                 }
476
477                                 if (tsk) {
478                                         if (ret & VM_FAULT_MAJOR)
479                                                 tsk->maj_flt++;
480                                         else
481                                                 tsk->min_flt++;
482                                 }
483
484                                 if (ret & VM_FAULT_RETRY) {
485                                         if (nonblocking)
486                                                 *nonblocking = 0;
487                                         return i;
488                                 }
489
490                                 /*
491                                  * The VM_FAULT_WRITE bit tells us that
492                                  * do_wp_page has broken COW when necessary,
493                                  * even if maybe_mkwrite decided not to set
494                                  * pte_write. We can thus safely do subsequent
495                                  * page lookups as if they were reads. But only
496                                  * do so when looping for pte_write is futile:
497                                  * in some cases userspace may also be wanting
498                                  * to write to the gotten user page, which a
499                                  * read fault here might prevent (a readonly
500                                  * page might get reCOWed by userspace write).
501                                  */
502                                 if ((ret & VM_FAULT_WRITE) &&
503                                     !(vma->vm_flags & VM_WRITE))
504                                         foll_flags &= ~FOLL_WRITE;
505
506                                 /* cond_resched(); */
507                         }
508                         if (IS_ERR(page)) {
509                                 return i ? i : PTR_ERR(page);
510                         }
511                         if (pages) {
512                                 pages[i] = page;
513
514                                 flush_anon_page(vma, page, start);
515                                 flush_dcache_page(page);
516                         }
517 next_page:
518                         if (vmas)
519                                 vmas[i] = vma;
520                         i++;
521                         start += PAGE_SIZE;
522                         nr_pages--;
523                 } while (nr_pages && start < vma->vm_end);
524         } while (nr_pages);
525
526         return i;
527 }
528 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
529
530 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
531                      unsigned long start, int len, int flags,
532                 struct page **pages, struct vm_area_struct **vmas)
533 {
534         int i;
535         unsigned int vm_flags = 0;
536         int write = !!(flags & GUP_FLAGS_WRITE);
537         int force = !!(flags & GUP_FLAGS_FORCE);
538         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
539         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
540
541         if (len <= 0)
542                 return 0;
543         /*
544          * Require read or write permissions.
545          * If 'force' is set, we only require the "MAY" flags.
546          */
547         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
548         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
549         i = 0;
550
551         do {
552                 struct vm_area_struct *vma;
553                 unsigned int foll_flags;
554
555                 vma = find_vma(mm, start);
556                 if (!vma && dbi_in_gate_area(tsk, start)) {
557                         unsigned long pg = start & PAGE_MASK;
558                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
559                         pgd_t *pgd;
560                         pud_t *pud;
561                         pmd_t *pmd;
562                         pte_t *pte;
563
564                         /* user gate pages are read-only */
565                         if (!ignore && write)
566                                 return i ? : -EFAULT;
567                         if (pg > TASK_SIZE)
568                                 pgd = pgd_offset_k(pg);
569                         else
570                                 pgd = pgd_offset_gate(mm, pg);
571                         BUG_ON(pgd_none(*pgd));
572                         pud = pud_offset(pgd, pg);
573                         BUG_ON(pud_none(*pud));
574                         pmd = pmd_offset(pud, pg);
575                         if (pmd_none(*pmd))
576                                 return i ? : -EFAULT;
577                         pte = pte_offset_map(pmd, pg);
578                         if (pte_none(*pte)) {
579                                 pte_unmap(pte);
580                                 return i ? : -EFAULT;
581                         }
582                         if (pages) {
583                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
584                                 pages[i] = page;
585                                 if (page)
586                                         get_page(page);
587                         }
588                         pte_unmap(pte);
589                         if (vmas)
590                                 vmas[i] = gate_vma;
591                         i++;
592                         start += PAGE_SIZE;
593                         len--;
594                         continue;
595                 }
596
597                 if (!vma ||
598                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
599                     (!ignore && !(vm_flags & vma->vm_flags)))
600                         return i ? : -EFAULT;
601
602                 if (is_vm_hugetlb_page(vma)) {
603 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
604                         i = follow_hugetlb_page(mm, vma, pages, vmas,
605                                                 &start, &len, i);
606 #else
607                         i = follow_hugetlb_page(mm, vma, pages, vmas,
608                                                 &start, &len, i, write);
609 #endif
610                         continue;
611                 }
612
613                 foll_flags = FOLL_TOUCH;
614                 if (pages)
615                         foll_flags |= FOLL_GET;
616
617 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
618 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
619                 if (!write && use_zero_page(vma))
620                   foll_flags |= FOLL_ANON;
621 #endif
622 #endif
623
624                 do {
625                         struct page *page;
626
627 #if 0
628                         /*
629                          * If we have a pending SIGKILL, don't keep faulting
630                          * pages and potentially allocating memory, unless
631                          * current is handling munlock--e.g., on exit. In
632                          * that case, we are not allocating memory.  Rather,
633                          * we're only unlocking already resident/mapped pages.
634                          */
635                         if (unlikely(!ignore_sigkill &&
636                                         fatal_signal_pending(current)))
637                                 return i ? i : -ERESTARTSYS;
638 #endif
639
640                         if (write)
641                                 foll_flags |= FOLL_WRITE;
642
643
644                         //cond_resched();
645
646                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
647                         while (!(page = follow_page(vma, start, foll_flags))) {
648                                 int ret;
649                                 ret = handle_mm_fault(mm, vma, start,
650                                                 foll_flags & FOLL_WRITE);
651
652 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
653                                 if (ret & VM_FAULT_WRITE)
654                                   foll_flags &= ~FOLL_WRITE;
655
656                                 switch (ret & ~VM_FAULT_WRITE) {
657                                 case VM_FAULT_MINOR:
658                                   tsk->min_flt++;
659                                   break;
660                                 case VM_FAULT_MAJOR:
661                                   tsk->maj_flt++;
662                                   break;
663                                 case VM_FAULT_SIGBUS:
664                                   return i ? i : -EFAULT;
665                                 case VM_FAULT_OOM:
666                                   return i ? i : -ENOMEM;
667                                 default:
668                                   BUG();
669                                 }
670
671 #else
672                                 if (ret & VM_FAULT_ERROR) {
673                                   if (ret & VM_FAULT_OOM)
674                                     return i ? i : -ENOMEM;
675                                   else if (ret & VM_FAULT_SIGBUS)
676                                     return i ? i : -EFAULT;
677                                   BUG();
678                                 }
679                                 if (ret & VM_FAULT_MAJOR)
680                                   tsk->maj_flt++;
681                                 else
682                                   tsk->min_flt++;
683
684                                 /*
685                                  * The VM_FAULT_WRITE bit tells us that
686                                  * do_wp_page has broken COW when necessary,
687                                  * even if maybe_mkwrite decided not to set
688                                  * pte_write. We can thus safely do subsequent
689                                  * page lookups as if they were reads. But only
690                                  * do so when looping for pte_write is futile:
691                                  * in some cases userspace may also be wanting
692                                  * to write to the gotten user page, which a
693                                  * read fault here might prevent (a readonly
694                                  * page might get reCOWed by userspace write).
695                                  */
696                                 if ((ret & VM_FAULT_WRITE) &&
697                                     !(vma->vm_flags & VM_WRITE))
698                                   foll_flags &= ~FOLL_WRITE;
699
700                                 //cond_resched();
701 #endif
702
703                         }
704
705                         if (IS_ERR(page))
706                                 return i ? i : PTR_ERR(page);
707                         if (pages) {
708                                 pages[i] = page;
709
710 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
711                                 flush_anon_page(page, start);
712 #else
713                                 flush_anon_page(vma, page, start);
714 #endif
715                                 flush_dcache_page(page);
716                         }
717                         if (vmas)
718                                 vmas[i] = vma;
719                         i++;
720                         start += PAGE_SIZE;
721                         len--;
722                 } while (len && start < vma->vm_end);
723         } while (len);
724         return i;
725 }
726 #endif
727 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
728
729 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
730                 unsigned long start, int len, int write, int force,
731                 struct page **pages, struct vm_area_struct **vmas)
732 {
733 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
734 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
735         int flags = FOLL_TOUCH;
736
737         if (pages)
738                 flags |= FOLL_GET;
739         if (write)
740                 flags |= FOLL_WRITE;
741         if (force)
742                 flags |= FOLL_FORCE;
743 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
744         int flags = 0;
745
746         if (write)
747                 flags |= GUP_FLAGS_WRITE;
748         if (force)
749                 flags |= GUP_FLAGS_FORCE;
750 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
751
752         return __get_user_pages_uprobe(tsk, mm,
753                                 start, len, flags,
754 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
755                                        pages, vmas, NULL);
756 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
757                                        pages, vmas);
758 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
759 #else
760         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
761 #endif
762 }
763
764 #define ACCESS_PROCESS_OPTIMIZATION 0
765
766 #if ACCESS_PROCESS_OPTIMIZATION
767
768 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
769 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
770
771 static void read_data_current(unsigned long addr, void *buf, int len)
772 {
773         int step;
774         int pos = 0;
775
776         for (step = GET_STEP_4(len); len; len -= step) {
777                 switch (GET_STEP_4(len)) {
778                 case 1:
779                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
780                         step = 1;
781                         break;
782
783                 case 2:
784                 case 3:
785                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
786                         step = 2;
787                         break;
788
789                 case 4:
790                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
791                         step = 4;
792                         break;
793                 }
794
795                 pos += step;
796         }
797 }
798
799 // not working
800 static void write_data_current(unsigned long addr, void *buf, int len)
801 {
802         int step;
803         int pos = 0;
804
805         for (step = GET_STEP_4(len); len; len -= step) {
806                 switch (GET_STEP_4(len)) {
807                 case 1:
808                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
809                         step = 1;
810                         break;
811
812                 case 2:
813                 case 3:
814                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
815                         step = 2;
816                         break;
817
818                 case 4:
819                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
820                         step = 4;
821                         break;
822                 }
823
824                 pos += step;
825         }
826 }
827 #endif
828
829 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
830 {
831         struct mm_struct *mm;
832         struct vm_area_struct *vma;
833         void *old_buf = buf;
834
835         if (len <= 0) {
836                 return -1;
837         }
838
839 #if ACCESS_PROCESS_OPTIMIZATION
840         if (write == 0 && tsk == current) {
841                 read_data_current(addr, buf, len);
842                 return len;
843         }
844 #endif
845
846         mm = tsk->mm; /* function 'get_task_mm' is to be called */
847         if (!mm)
848                 return 0;
849
850         /* ignore errors, just check how much was successfully transferred */
851         while (len) {
852                 int bytes, ret, offset;
853                 void *maddr;
854                 struct page *page = NULL;
855
856                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
857                                             write, 1, &page, &vma);
858
859                 if (ret <= 0) {
860                         /*
861                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
862                          * we can access using slightly different code.
863                          */
864 #ifdef CONFIG_HAVE_IOREMAP_PROT
865                         vma = find_vma(mm, addr);
866                         if (!vma)
867                                 break;
868                         if (vma->vm_ops && vma->vm_ops->access)
869                                 ret = vma->vm_ops->access(vma, addr, buf,
870                                                           len, write);
871                         if (ret <= 0)
872 #endif
873                                 break;
874                         bytes = ret;
875                 } else {
876                         bytes = len;
877                         offset = addr & (PAGE_SIZE-1);
878                         if (bytes > PAGE_SIZE-offset)
879                                 bytes = PAGE_SIZE-offset;
880
881                         maddr = dbi_kmap_atomic(page);
882
883                         if (write) {
884                                 copy_to_user_page(vma, page, addr,
885                                                   maddr + offset, buf, bytes);
886                                 set_page_dirty_lock(page);
887                         } else {
888                                 copy_from_user_page(vma, page, addr,
889                                                     buf, maddr + offset, bytes);
890                         }
891
892                         dbi_kunmap_atomic(maddr);
893                         page_cache_release(page);
894                 }
895                 len -= bytes;
896                 buf += bytes;
897                 addr += bytes;
898         }
899
900         return buf - old_buf;
901 }
902
903 int page_present (struct mm_struct *mm, unsigned long address)
904 {
905         pgd_t *pgd;
906         pud_t *pud;
907         pmd_t *pmd;
908         pte_t *ptep, pte;
909         unsigned long pfn;
910
911         pgd = pgd_offset(mm, address);
912         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
913                 goto out;
914
915         pud = pud_offset(pgd, address);
916         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
917                 goto out;
918
919         pmd = pmd_offset(pud, address);
920         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
921                 goto out;
922
923         ptep = pte_offset_map(pmd, address);
924         if (!ptep)
925                 goto out;
926
927         pte = *ptep;
928         pte_unmap(ptep);
929         if (pte_present(pte)) {
930                 pfn = pte_pfn(pte);
931                 if (pfn_valid(pfn)) {
932                         return 1;
933                 }
934         }
935
936 out:
937         return 0;
938 }
939
940
941 EXPORT_SYMBOL_GPL (page_present);
942 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
943 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
944