kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25  */
  26
  27 #include <linux/module.h>
  28 #include <linux/sched.h>
  29
  30 #include <asm/pgtable.h>
  31
  32 #include "dbi_kprobes_deps.h"
  33 #include "dbi_kdebug.h"
  34
  35
  36 #include <linux/slab.h>
  37 #include <linux/mm.h>
  38
  39 unsigned long sched_addr;
  40 unsigned long fork_addr;
  41 unsigned long exit_addr;
  42 unsigned long sys_exit_group_addr;
  43 unsigned long do_group_exit_addr;
  44 unsigned long sys_exit_addr;
  45
  46 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  47 /* kernel define 'pgd_offset_k' redefinition */
  48 #undef pgd_offset_k
  49 #define pgd_offset_k(addr)      pgd_offset(init_task.active_mm, addr)
  50 #endif
  51
  52 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
  53 #ifndef is_zero_pfn
  54
  55 static unsigned long swap_zero_pfn = 0;
  56
  57 #endif /* is_zero_pfn */
  58 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
  59
  60 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
  61 static inline void *dbi_kmap_atomic(struct page *page)
  62 {
  63         return kmap_atomic(page);
  64 }
  65 static inline void dbi_kunmap_atomic(void *kvaddr)
  66 {
  67         kunmap_atomic(kvaddr);
  68 }
  69 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  70 static inline void *dbi_kmap_atomic(struct page *page)
  71 {
  72         return kmap_atomic(page, KM_USER0);
  73 }
  74
  75 static inline void dbi_kunmap_atomic(void *kvaddr)
  76 {
  77         kunmap_atomic(kvaddr, KM_USER0);
  78 }
  79 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  80
  81 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  82 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
  83 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
  84 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
  85 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
  86 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
  87 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
  88 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
  89 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
  90
  91 /* copy_to_user_page */
  92 #ifndef copy_to_user_page
  93 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
  94 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
  95 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
  96 #endif /* copy_to_user_page */
  97
  98
  99 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
 100
 101 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
 102
 103 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 104 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 105 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
 106 #endif
 107 #else
 108 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
 109 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
 110
 111 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 112 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
 113 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 114 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
 115 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 116
 117 #ifdef CONFIG_HUGETLB_PAGE
 118 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
 119                 struct vm_area_struct *vma, struct page **pages, \
 120                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
 121                 int i, int write);
 122 #endif
 123
 124 #ifdef __HAVE_ARCH_GATE_AREA
 125 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 126 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
 127 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 128 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
 129 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 130 #endif /* __HAVE_ARCH_GATE_AREA */
 131
 132 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 133 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
 134 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 135 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
 136 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 137
 138 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 139 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
 140                 struct page *, struct vm_area_struct * vma, \
 141                 unsigned long address, unsigned int foll_flags, \
 142                 unsigned int *page_mask);
 143 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
 144                                 struct vm_area_struct * vma, \
 145                                 unsigned long address, \
 146                                 unsigned int foll_flags, \
 147                                 unsigned int *page_mask)
 148 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
 149 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 150 static DECLARE_MOD_FUNC_DEP(follow_page, \
 151                 struct page *, struct vm_area_struct * vma, \
 152                 unsigned long address, unsigned int foll_flags);
 153 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
 154                                 struct vm_area_struct * vma, \
 155                                 unsigned long address, \
 156                                 unsigned int foll_flags)
 157 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 158 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 159
 160 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
 161                 void, struct vm_area_struct *vma, struct page *page, \
 162                 unsigned long vmaddr);
 163 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
 164                 struct page *, struct vm_area_struct *vma, \
 165                 unsigned long addr, pte_t pte);
 166 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
 167                 void, struct vm_area_struct *vma, struct page *page, \
 168                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
 169
 170
 171 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 172 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 173                 void, struct task_struct *tsk);
 174 #else
 175 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 176                 void, struct rcu_head * rhp);
 177 #endif
 178
 179         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 180 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
 181
 182         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
 183 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
 184
 185 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 186 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 187         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 188                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 189 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 190 #endif
 191 #else
 192         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 193                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 194 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 195 #endif
 196
 197 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 198         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 199                         struct vm_area_struct *, struct mm_struct *mm)
 200 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
 201 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 202         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 203                         struct vm_area_struct *, struct task_struct *tsk)
 204 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 205 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 206
 207 #ifdef CONFIG_HUGETLB_PAGE
 208         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
 209         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
 210 #endif
 211
 212 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
 213 {
 214 #ifdef __HAVE_ARCH_GATE_AREA
 215 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 216         struct mm_struct *mm = task->mm;
 217         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
 218 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 219         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
 220 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 221 #else /*__HAVE_ARCH_GATE_AREA */
 222         return in_gate_area(task, addr);
 223 #endif/*__HAVE_ARCH_GATE_AREA */
 224 }
 225
 226
 227 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 228 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
 229 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
 230 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 231 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
 232 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
 233 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 234
 235 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
 236 {
 237 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 238         return in_gate_area_no_mm(addr);
 239 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 240         return in_gate_area_no_task(addr);
 241 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 242 }
 243
 244 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 245                         void, struct vm_area_struct *vma, \
 246                         struct page *page, unsigned long vmaddr)
 247 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 248
 249 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 250                         struct page *, struct vm_area_struct *vma, \
 251                         unsigned long addr, pte_t pte)
 252 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 253
 254 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
 255         void, struct vm_area_struct *vma, struct page *page, \
 256         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 257 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 258
 259
 260
 261 int init_module_dependencies(void)
 262 {
 263
 264 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 265         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 266 #endif
 267
 268 #ifndef copy_to_user_page
 269         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
 270 #endif /* copy_to_user_page */
 271
 272         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 273         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 274         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 275
 276 #ifdef CONFIG_HUGETLB_PAGE
 277         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
 278 #endif
 279
 280 #ifdef  __HAVE_ARCH_GATE_AREA
 281         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
 282 #endif
 283
 284 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 285         INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
 286 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 287         INIT_MOD_DEP_VAR(follow_page, follow_page);
 288 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 289
 290 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 291
 292 #ifndef is_zero_pfn
 293         swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
 294 #endif /* is_zero_pfn */
 295
 296         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
 297 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 298         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 299 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 300
 301         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 302         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 303         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 304
 305 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 306 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 307         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 308 # else
 309         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 310 # endif
 311 #else /*2.6.16 */
 312         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 313 #endif
 314 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 315         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
 316 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
 317
 318         return 0;
 319 }
 320
 321 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
 322 #define GUP_FLAGS_WRITE                  0x1
 323 #define GUP_FLAGS_FORCE                  0x2
 324 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 325 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 326 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
 327
 328 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 329 static inline int use_zero_page(struct vm_area_struct *vma)
 330 {
 331         /*
 332          * We don't want to optimize FOLL_ANON for make_pages_present()
 333          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 334          * we want to get the page from the page tables to make sure
 335          * that we serialize and update with any other user of that
 336          * mapping.
 337          */
 338         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 339                 return 0;
 340         /*
 341          * And if we have a fault routine, it's not an anonymous region.
 342          */
 343         return !vma->vm_ops || !vma->vm_ops->fault;
 344 }
 345
 346 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 347
 348 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 349
 350 #ifdef __HAVE_COLOR_ZERO_PAGE
 351
 352 static inline int swap_is_zero_pfn(unsigned long pfn)
 353 {
 354         unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
 355         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 356 }
 357
 358 #else /* __HAVE_COLOR_ZERO_PAGE */
 359
 360 static inline int swap_is_zero_pfn(unsigned long pfn)
 361 {
 362         return pfn == swap_zero_pfn;
 363 }
 364 #endif /* __HAVE_COLOR_ZERO_PAGE */
 365
 366 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 367
 368 static inline int swap_is_zero_pfn(unsigned long pfn)
 369 {
 370 #ifndef is_zero_pfn
 371         return pfn == swap_zero_pfn;
 372 #else /* is_zero_pfn */
 373         return is_zero_pfn(pfn);
 374 #endif /* is_zero_pfn */
 375 }
 376
 377 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 378
 379 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 380 {
 381         return stack_guard_page_start(vma, addr) ||
 382                         stack_guard_page_end(vma, addr+PAGE_SIZE);
 383 }
 384
 385 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 386
 387 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
 388         unsigned long address, unsigned int foll_flags)
 389 {
 390     unsigned int unused_page_mask;
 391     return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
 392 }
 393
 394 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 395                 unsigned long start, unsigned long nr_pages,
 396                 unsigned int gup_flags, struct page **pages,
 397                 struct vm_area_struct **vmas, int *nonblocking)
 398 {
 399         long i;
 400         unsigned long vm_flags;
 401         unsigned int page_mask;
 402
 403         if (!nr_pages)
 404                 return 0;
 405
 406         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 407
 408         /*
 409          * Require read or write permissions.
 410          * If FOLL_FORCE is set, we only require the "MAY" flags.
 411          */
 412         vm_flags  = (gup_flags & FOLL_WRITE) ?
 413                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 414         vm_flags &= (gup_flags & FOLL_FORCE) ?
 415                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 416
 417         /*
 418          * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
 419          * would be called on PROT_NONE ranges. We must never invoke
 420          * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
 421          * page faults would unprotect the PROT_NONE ranges if
 422          * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
 423          * bitflag. So to avoid that, don't set FOLL_NUMA if
 424          * FOLL_FORCE is set.
 425          */
 426         if (!(gup_flags & FOLL_FORCE))
 427                 gup_flags |= FOLL_NUMA;
 428
 429         i = 0;
 430
 431         do {
 432                 struct vm_area_struct *vma;
 433
 434                 vma = find_extend_vma(mm, start);
 435                 if (!vma && dbi_in_gate_area(tsk, start)) {
 436                         unsigned long pg = start & PAGE_MASK;
 437                         pgd_t *pgd;
 438                         pud_t *pud;
 439                         pmd_t *pmd;
 440                         pte_t *pte;
 441
 442                         /* user gate pages are read-only */
 443                         if (gup_flags & FOLL_WRITE)
 444                                 return i ? : -EFAULT;
 445                         if (pg > TASK_SIZE)
 446                                 pgd = pgd_offset_k(pg);
 447                         else
 448                                 pgd = pgd_offset_gate(mm, pg);
 449                         BUG_ON(pgd_none(*pgd));
 450                         pud = pud_offset(pgd, pg);
 451                         BUG_ON(pud_none(*pud));
 452                         pmd = pmd_offset(pud, pg);
 453                         if (pmd_none(*pmd))
 454                                 return i ? : -EFAULT;
 455                         VM_BUG_ON(pmd_trans_huge(*pmd));
 456                         pte = pte_offset_map(pmd, pg);
 457                         if (pte_none(*pte)) {
 458                                 pte_unmap(pte);
 459                                 return i ? : -EFAULT;
 460                         }
 461                         vma = get_gate_vma(mm);
 462                         if (pages) {
 463                                 struct page *page;
 464
 465                                 page = vm_normal_page(vma, start, *pte);
 466                                 if (!page) {
 467                                         if (!(gup_flags & FOLL_DUMP) &&
 468                                              swap_is_zero_pfn(pte_pfn(*pte)))
 469                                                 page = pte_page(*pte);
 470                                         else {
 471                                                 pte_unmap(pte);
 472                                                 return i ? : -EFAULT;
 473                                         }
 474                                 }
 475                                 pages[i] = page;
 476                                 get_page(page);
 477                         }
 478                         pte_unmap(pte);
 479                         page_mask = 0;
 480                         goto next_page;
 481                 }
 482
 483                 if (!vma ||
 484                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 485                     !(vm_flags & vma->vm_flags))
 486                         return i ? : -EFAULT;
 487
 488                 if (is_vm_hugetlb_page(vma)) {
 489                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 490                                         &start, &nr_pages, i, gup_flags);
 491                         continue;
 492                 }
 493
 494                 do {
 495                         struct page *page;
 496                         unsigned int foll_flags = gup_flags;
 497                         unsigned int page_increm;
 498
 499                         /*
 500                          * If we have a pending SIGKILL, don't keep faulting
 501                          * pages and potentially allocating memory.
 502                          */
 503                         if (unlikely(fatal_signal_pending(current)))
 504                                 return i ? i : -ERESTARTSYS;
 505
 506                         /* cond_resched(); */
 507                         while (!(page = follow_page_mask(vma, start,
 508                                                 foll_flags, &page_mask))) {
 509                                 int ret;
 510                                 unsigned int fault_flags = 0;
 511
 512                                 /* For mlock, just skip the stack guard page. */
 513                                 if (foll_flags & FOLL_MLOCK) {
 514                                         if (stack_guard_page(vma, start))
 515                                                 goto next_page;
 516                                 }
 517                                 if (foll_flags & FOLL_WRITE)
 518                                         fault_flags |= FAULT_FLAG_WRITE;
 519                                 if (nonblocking)
 520                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 521                                 if (foll_flags & FOLL_NOWAIT)
 522                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 523
 524                                 ret = handle_mm_fault(mm, vma, start,
 525                                                         fault_flags);
 526
 527                                 if (ret & VM_FAULT_ERROR) {
 528                                         if (ret & VM_FAULT_OOM)
 529                                                 return i ? i : -ENOMEM;
 530                                         if (ret & (VM_FAULT_HWPOISON |
 531                                                    VM_FAULT_HWPOISON_LARGE)) {
 532                                                 if (i)
 533                                                         return i;
 534                                                 else if (gup_flags & FOLL_HWPOISON)
 535                                                         return -EHWPOISON;
 536                                                 else
 537                                                         return -EFAULT;
 538                                         }
 539                                         if (ret & VM_FAULT_SIGBUS)
 540                                                 return i ? i : -EFAULT;
 541                                         BUG();
 542                                 }
 543
 544                                 if (tsk) {
 545                                         if (ret & VM_FAULT_MAJOR)
 546                                                 tsk->maj_flt++;
 547                                         else
 548                                                 tsk->min_flt++;
 549                                 }
 550
 551                                 if (ret & VM_FAULT_RETRY) {
 552                                         if (nonblocking)
 553                                                 *nonblocking = 0;
 554                                         return i;
 555                                 }
 556
 557                                 /*
 558                                  * The VM_FAULT_WRITE bit tells us that
 559                                  * do_wp_page has broken COW when necessary,
 560                                  * even if maybe_mkwrite decided not to set
 561                                  * pte_write. We can thus safely do subsequent
 562                                  * page lookups as if they were reads. But only
 563                                  * do so when looping for pte_write is futile:
 564                                  * in some cases userspace may also be wanting
 565                                  * to write to the gotten user page, which a
 566                                  * read fault here might prevent (a readonly
 567                                  * page might get reCOWed by userspace write).
 568                                  */
 569                                 if ((ret & VM_FAULT_WRITE) &&
 570                                     !(vma->vm_flags & VM_WRITE))
 571                                         foll_flags &= ~FOLL_WRITE;
 572
 573                                 /* cond_resched(); */
 574                         }
 575                         if (IS_ERR(page))
 576                                 return i ? i : PTR_ERR(page);
 577                         if (pages) {
 578                                 pages[i] = page;
 579
 580                                 flush_anon_page(vma, page, start);
 581                                 flush_dcache_page(page);
 582                                 page_mask = 0;
 583                         }
 584 next_page:
 585                         if (vmas) {
 586                                 vmas[i] = vma;
 587                                 page_mask = 0;
 588                         }
 589                         page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
 590                         if (page_increm > nr_pages)
 591                                 page_increm = nr_pages;
 592                         i += page_increm;
 593                         start += page_increm * PAGE_SIZE;
 594                         nr_pages -= page_increm;
 595                 } while (nr_pages && start < vma->vm_end);
 596         } while (nr_pages);
 597         return i;
 598 }
 599
 600 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 601
 602 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 603                         unsigned long start, int nr_pages, unsigned int gup_flags,
 604                         struct page **pages, struct vm_area_struct **vmas,
 605                         int *nonblocking)
 606 {
 607         int i;
 608         unsigned long vm_flags;
 609
 610         if (nr_pages <= 0) {
 611                 return 0;
 612         }
 613
 614         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 615
 616         /*
 617          * Require read or write permissions.
 618          * If FOLL_FORCE is set, we only require the "MAY" flags.
 619          */
 620         vm_flags  = (gup_flags & FOLL_WRITE) ?
 621                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 622         vm_flags &= (gup_flags & FOLL_FORCE) ?
 623                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 624         i = 0;
 625
 626         do {
 627                 struct vm_area_struct *vma;
 628
 629                 vma = find_extend_vma(mm, start);
 630                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
 631                         unsigned long pg = start & PAGE_MASK;
 632                         pgd_t *pgd;
 633                         pud_t *pud;
 634                         pmd_t *pmd;
 635                         pte_t *pte;
 636
 637                         /* user gate pages are read-only */
 638                         if (gup_flags & FOLL_WRITE) {
 639                                 return i ? : -EFAULT;
 640                         }
 641                         if (pg > TASK_SIZE)
 642                                 pgd = pgd_offset_k(pg);
 643                         else
 644                                 pgd = pgd_offset_gate(mm, pg);
 645                         BUG_ON(pgd_none(*pgd));
 646                         pud = pud_offset(pgd, pg);
 647                         BUG_ON(pud_none(*pud));
 648                         pmd = pmd_offset(pud, pg);
 649                         if (pmd_none(*pmd)) {
 650                                 return i ? : -EFAULT;
 651                         }
 652                         VM_BUG_ON(pmd_trans_huge(*pmd));
 653                         pte = pte_offset_map(pmd, pg);
 654                         if (pte_none(*pte)) {
 655                                 pte_unmap(pte);
 656                                 return i ? : -EFAULT;
 657                         }
 658                         vma = get_gate_vma(mm);
 659                         if (pages) {
 660                                 struct page *page;
 661
 662                                 page = vm_normal_page(vma, start, *pte);
 663                                 if (!page) {
 664                                         if (!(gup_flags & FOLL_DUMP) &&
 665                                                 swap_is_zero_pfn(pte_pfn(*pte)))
 666                                                 page = pte_page(*pte);
 667                                         else {
 668                                                 pte_unmap(pte);
 669                                                 return i ? : -EFAULT;
 670                                         }
 671                                 }
 672                                 pages[i] = page;
 673                                 get_page(page);
 674                         }
 675                         pte_unmap(pte);
 676                         goto next_page;
 677                 }
 678
 679                 if (!vma ||
 680                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 681                         !(vm_flags & vma->vm_flags)) {
 682                         return i ? : -EFAULT;
 683                 }
 684
 685                 if (is_vm_hugetlb_page(vma)) {
 686                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 687                                         &start, &nr_pages, i, gup_flags);
 688                         continue;
 689                 }
 690
 691                 do {
 692                         struct page *page;
 693                         unsigned int foll_flags = gup_flags;
 694
 695                         /*
 696                          * If we have a pending SIGKILL, don't keep faulting
 697                          * pages and potentially allocating memory.
 698                          */
 699                         if (unlikely(fatal_signal_pending(current))) {
 700                                 return i ? i : -ERESTARTSYS;
 701                         }
 702
 703                         /* cond_resched(); */
 704                         while (!(page = follow_page(vma, start, foll_flags))) {
 705                                 int ret;
 706                                 unsigned int fault_flags = 0;
 707
 708                                 /* For mlock, just skip the stack guard page. */
 709                                 if (foll_flags & FOLL_MLOCK) {
 710                                         if (stack_guard_page(vma, start))
 711                                                 goto next_page;
 712                                 }
 713                                 if (foll_flags & FOLL_WRITE)
 714                                         fault_flags |= FAULT_FLAG_WRITE;
 715                                 if (nonblocking)
 716                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 717                                 if (foll_flags & FOLL_NOWAIT)
 718                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 719
 720                                 ret = handle_mm_fault(mm, vma, start,
 721                                                         fault_flags);
 722
 723                                 if (ret & VM_FAULT_ERROR) {
 724                                         if (ret & VM_FAULT_OOM) {
 725                                                 return i ? i : -ENOMEM;
 726                                         }
 727                                         if (ret & (VM_FAULT_HWPOISON |
 728                                                                 VM_FAULT_HWPOISON_LARGE)) {
 729                                                 if (i) {
 730                                                         return i;
 731                                                 }
 732                                                 else if (gup_flags & FOLL_HWPOISON) {
 733                                                         return -EHWPOISON;
 734                                                 }
 735                                                 else {
 736                                                         return -EFAULT;
 737                                                 }
 738                                         }
 739                                         if (ret & VM_FAULT_SIGBUS) {
 740                                                 return i ? i : -EFAULT;
 741                                         }
 742                                         BUG();
 743                                 }
 744
 745                                 if (tsk) {
 746                                         if (ret & VM_FAULT_MAJOR)
 747                                                 tsk->maj_flt++;
 748                                         else
 749                                                 tsk->min_flt++;
 750                                 }
 751
 752                                 if (ret & VM_FAULT_RETRY) {
 753                                         if (nonblocking)
 754                                                 *nonblocking = 0;
 755                                         return i;
 756                                 }
 757
 758                                 /*
 759                                  * The VM_FAULT_WRITE bit tells us that
 760                                  * do_wp_page has broken COW when necessary,
 761                                  * even if maybe_mkwrite decided not to set
 762                                  * pte_write. We can thus safely do subsequent
 763                                  * page lookups as if they were reads. But only
 764                                  * do so when looping for pte_write is futile:
 765                                  * in some cases userspace may also be wanting
 766                                  * to write to the gotten user page, which a
 767                                  * read fault here might prevent (a readonly
 768                                  * page might get reCOWed by userspace write).
 769                                  */
 770                                 if ((ret & VM_FAULT_WRITE) &&
 771                                         !(vma->vm_flags & VM_WRITE))
 772                                         foll_flags &= ~FOLL_WRITE;
 773
 774                                 /* cond_resched(); */
 775                         }
 776                         if (IS_ERR(page)) {
 777                                 return i ? i : PTR_ERR(page);
 778                         }
 779                         if (pages) {
 780                                 pages[i] = page;
 781
 782                                 flush_anon_page(vma, page, start);
 783                                 flush_dcache_page(page);
 784                         }
 785 next_page:
 786                         if (vmas)
 787                                 vmas[i] = vma;
 788                         i++;
 789                         start += PAGE_SIZE;
 790                         nr_pages--;
 791                 } while (nr_pages && start < vma->vm_end);
 792         } while (nr_pages);
 793
 794         return i;
 795 }
 796
 797 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 798
 799 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 800
 801 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 802                 unsigned long start, int len, int flags,
 803                 struct page **pages, struct vm_area_struct **vmas)
 804 {
 805         int i;
 806         unsigned int vm_flags = 0;
 807         int write = !!(flags & GUP_FLAGS_WRITE);
 808         int force = !!(flags & GUP_FLAGS_FORCE);
 809         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 810
 811         if (len <= 0)
 812                 return 0;
 813         /*
 814          * Require read or write permissions.
 815          * If 'force' is set, we only require the "MAY" flags.
 816          */
 817         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 818         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 819         i = 0;
 820
 821         do {
 822                 struct vm_area_struct *vma;
 823                 unsigned int foll_flags;
 824
 825                 vma = find_vma(mm, start);
 826                 if (!vma && dbi_in_gate_area(tsk, start)) {
 827                         unsigned long pg = start & PAGE_MASK;
 828                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 829                         pgd_t *pgd;
 830                         pud_t *pud;
 831                         pmd_t *pmd;
 832                         pte_t *pte;
 833
 834                         /* user gate pages are read-only */
 835                         if (!ignore && write)
 836                                 return i ? : -EFAULT;
 837                         if (pg > TASK_SIZE)
 838                                 pgd = pgd_offset_k(pg);
 839                         else
 840                                 pgd = pgd_offset_gate(mm, pg);
 841                         BUG_ON(pgd_none(*pgd));
 842                         pud = pud_offset(pgd, pg);
 843                         BUG_ON(pud_none(*pud));
 844                         pmd = pmd_offset(pud, pg);
 845                         if (pmd_none(*pmd))
 846                                 return i ? : -EFAULT;
 847                         pte = pte_offset_map(pmd, pg);
 848                         if (pte_none(*pte)) {
 849                                 pte_unmap(pte);
 850                                 return i ? : -EFAULT;
 851                         }
 852                         if (pages) {
 853                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 854                                 pages[i] = page;
 855                                 if (page)
 856                                         get_page(page);
 857                         }
 858                         pte_unmap(pte);
 859                         if (vmas)
 860                                 vmas[i] = gate_vma;
 861                         i++;
 862                         start += PAGE_SIZE;
 863                         len--;
 864                         continue;
 865                 }
 866
 867                 if (!vma ||
 868                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 869                         (!ignore && !(vm_flags & vma->vm_flags)))
 870                         return i ? : -EFAULT;
 871
 872                 if (is_vm_hugetlb_page(vma)) {
 873 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 874                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 875                                                 &start, &len, i);
 876 #else
 877                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 878                                                 &start, &len, i, write);
 879 #endif
 880                         continue;
 881                 }
 882
 883                 foll_flags = FOLL_TOUCH;
 884                 if (pages)
 885                         foll_flags |= FOLL_GET;
 886
 887 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 888 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 889                 if (!write && use_zero_page(vma))
 890                         foll_flags |= FOLL_ANON;
 891 #endif
 892 #endif
 893
 894                 do {
 895                         struct page *page;
 896
 897                         if (write)
 898                                 foll_flags |= FOLL_WRITE;
 899
 900
 901                         //cond_resched();
 902
 903                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 904                         while (!(page = follow_page(vma, start, foll_flags))) {
 905                                 int ret;
 906                                 ret = handle_mm_fault(mm, vma, start,
 907                                                 foll_flags & FOLL_WRITE);
 908
 909 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 910                                 if (ret & VM_FAULT_WRITE)
 911                                         foll_flags &= ~FOLL_WRITE;
 912
 913                                 switch (ret & ~VM_FAULT_WRITE) {
 914                                 case VM_FAULT_MINOR:
 915                                         tsk->min_flt++;
 916                                         break;
 917                                 case VM_FAULT_MAJOR:
 918                                         tsk->maj_flt++;
 919                                         break;
 920                                 case VM_FAULT_SIGBUS:
 921                                         return i ? i : -EFAULT;
 922                                 case VM_FAULT_OOM:
 923                                         return i ? i : -ENOMEM;
 924                                 default:
 925                                         BUG();
 926                                 }
 927
 928 #else
 929                                 if (ret & VM_FAULT_ERROR) {
 930                                         if (ret & VM_FAULT_OOM)
 931                                                 return i ? i : -ENOMEM;
 932                                         else if (ret & VM_FAULT_SIGBUS)
 933                                                 return i ? i : -EFAULT;
 934                                         BUG();
 935                                 }
 936                                 if (ret & VM_FAULT_MAJOR)
 937                                         tsk->maj_flt++;
 938                                 else
 939                                         tsk->min_flt++;
 940
 941                                 /*
 942                                  * The VM_FAULT_WRITE bit tells us that
 943                                  * do_wp_page has broken COW when necessary,
 944                                  * even if maybe_mkwrite decided not to set
 945                                  * pte_write. We can thus safely do subsequent
 946                                  * page lookups as if they were reads. But only
 947                                  * do so when looping for pte_write is futile:
 948                                  * in some cases userspace may also be wanting
 949                                  * to write to the gotten user page, which a
 950                                  * read fault here might prevent (a readonly
 951                                  * page might get reCOWed by userspace write).
 952                                  */
 953                                 if ((ret & VM_FAULT_WRITE) &&
 954                                                 !(vma->vm_flags & VM_WRITE))
 955                                         foll_flags &= ~FOLL_WRITE;
 956
 957                                 //cond_resched();
 958 #endif
 959
 960                         }
 961
 962                         if (IS_ERR(page))
 963                                 return i ? i : PTR_ERR(page);
 964                         if (pages) {
 965                                 pages[i] = page;
 966
 967 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 968                                 flush_anon_page(page, start);
 969 #else
 970                                 flush_anon_page(vma, page, start);
 971 #endif
 972                                 flush_dcache_page(page);
 973                         }
 974                         if (vmas)
 975                                 vmas[i] = vma;
 976                         i++;
 977                         start += PAGE_SIZE;
 978                         len--;
 979                 } while (len && start < vma->vm_end);
 980         } while (len);
 981         return i;
 982 }
 983 #endif
 984 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 985
 986 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 987                 unsigned long start, int len, int write, int force,
 988                 struct page **pages, struct vm_area_struct **vmas)
 989 {
 990 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 991 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
 992         int flags = FOLL_TOUCH;
 993
 994         if (pages)
 995                 flags |= FOLL_GET;
 996         if (write)
 997                 flags |= FOLL_WRITE;
 998         if (force)
 999                 flags |= FOLL_FORCE;
1000 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1001         int flags = 0;
1002
1003         if (write)
1004                 flags |= GUP_FLAGS_WRITE;
1005         if (force)
1006                 flags |= GUP_FLAGS_FORCE;
1007 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1008
1009         return __get_user_pages_uprobe(tsk, mm,
1010                                 start, len, flags,
1011 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1012                                                 pages, vmas, NULL);
1013 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1014                                                 pages, vmas);
1015 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1016 #else
1017         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1018 #endif
1019 }
1020
1021 #define ACCESS_PROCESS_OPTIMIZATION 0
1022
1023 #if ACCESS_PROCESS_OPTIMIZATION
1024
1025 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1026 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1027
1028 static void read_data_current(unsigned long addr, void *buf, int len)
1029 {
1030         int step;
1031         int pos = 0;
1032
1033         for (step = GET_STEP_4(len); len; len -= step) {
1034                 switch (GET_STEP_4(len)) {
1035                 case 1:
1036                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1037                         step = 1;
1038                         break;
1039
1040                 case 2:
1041                 case 3:
1042                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1043                         step = 2;
1044                         break;
1045
1046                 case 4:
1047                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1048                         step = 4;
1049                         break;
1050                 }
1051
1052                 pos += step;
1053         }
1054 }
1055
1056 // not working
1057 static void write_data_current(unsigned long addr, void *buf, int len)
1058 {
1059         int step;
1060         int pos = 0;
1061
1062         for (step = GET_STEP_4(len); len; len -= step) {
1063                 switch (GET_STEP_4(len)) {
1064                 case 1:
1065                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1066                         step = 1;
1067                         break;
1068
1069                 case 2:
1070                 case 3:
1071                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1072                         step = 2;
1073                         break;
1074
1075                 case 4:
1076                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1077                         step = 4;
1078                         break;
1079                 }
1080
1081                 pos += step;
1082         }
1083 }
1084 #endif
1085
1086 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1087 {
1088         struct mm_struct *mm;
1089         struct vm_area_struct *vma;
1090         void *old_buf = buf;
1091
1092         if (len <= 0) {
1093                 return -1;
1094         }
1095
1096 #if ACCESS_PROCESS_OPTIMIZATION
1097         if (write == 0 && tsk == current) {
1098                 read_data_current(addr, buf, len);
1099                 return len;
1100         }
1101 #endif
1102
1103         mm = tsk->mm; /* function 'get_task_mm' is to be called */
1104         if (!mm)
1105                 return 0;
1106
1107         /* ignore errors, just check how much was successfully transferred */
1108         while (len) {
1109                 int bytes, ret, offset;
1110                 void *maddr;
1111                 struct page *page = NULL;
1112
1113                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1114                                                 write, 1, &page, &vma);
1115
1116                 if (ret <= 0) {
1117                         /*
1118                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
1119                          * we can access using slightly different code.
1120                          */
1121 #ifdef CONFIG_HAVE_IOREMAP_PROT
1122                         vma = find_vma(mm, addr);
1123                         if (!vma)
1124                                 break;
1125                         if (vma->vm_ops && vma->vm_ops->access)
1126                                 ret = vma->vm_ops->access(vma, addr, buf,
1127                                                         len, write);
1128                         if (ret <= 0)
1129 #endif
1130                                 break;
1131                         bytes = ret;
1132                 } else {
1133                         bytes = len;
1134                         offset = addr & (PAGE_SIZE-1);
1135                         if (bytes > PAGE_SIZE-offset)
1136                                 bytes = PAGE_SIZE-offset;
1137
1138                         maddr = dbi_kmap_atomic(page);
1139
1140                         if (write) {
1141                                 copy_to_user_page(vma, page, addr,
1142                                                         maddr + offset, buf, bytes);
1143                                 set_page_dirty_lock(page);
1144                         } else {
1145                                 copy_from_user_page(vma, page, addr,
1146                                                         buf, maddr + offset, bytes);
1147                         }
1148
1149                         dbi_kunmap_atomic(maddr);
1150                         page_cache_release(page);
1151                 }
1152                 len -= bytes;
1153                 buf += bytes;
1154                 addr += bytes;
1155         }
1156
1157         return buf - old_buf;
1158 }
1159
1160 int page_present (struct mm_struct *mm, unsigned long address)
1161 {
1162         pgd_t *pgd;
1163         pud_t *pud;
1164         pmd_t *pmd;
1165         pte_t *ptep, pte;
1166         unsigned long pfn;
1167
1168         pgd = pgd_offset(mm, address);
1169         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1170                 goto out;
1171
1172         pud = pud_offset(pgd, address);
1173         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1174                 goto out;
1175
1176         pmd = pmd_offset(pud, address);
1177         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1178                 goto out;
1179
1180         ptep = pte_offset_map(pmd, address);
1181         if (!ptep)
1182                 goto out;
1183
1184         pte = *ptep;
1185         pte_unmap(ptep);
1186         if (pte_present(pte)) {
1187                 pfn = pte_pfn(pte);
1188                 if (pfn_valid(pfn)) {
1189                         return 1;
1190                 }
1191         }
1192
1193 out:
1194         return 0;
1195 }
1196
1197
1198 EXPORT_SYMBOL_GPL (page_present);
1199 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
1200 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
1201