kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25  */
  26
  27 #include <linux/module.h>
  28 #include <linux/sched.h>
  29
  30 #include <asm/pgtable.h>
  31
  32 #include "dbi_kprobes_deps.h"
  33 #include "dbi_kdebug.h"
  34
  35
  36 #include <linux/slab.h>
  37 #include <linux/mm.h>
  38
  39 unsigned long sched_addr;
  40 unsigned long fork_addr;
  41 unsigned long exit_addr;
  42 unsigned long sys_exit_group_addr;
  43 unsigned long do_group_exit_addr;
  44
  45 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  46 /* kernel define 'pgd_offset_k' redefinition */
  47 #undef pgd_offset_k
  48 #define pgd_offset_k(addr)      pgd_offset(init_task.active_mm, addr)
  49 #endif
  50
  51 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
  52 #ifndef is_zero_pfn
  53
  54 static unsigned long swap_zero_pfn = 0;
  55
  56 #endif /* is_zero_pfn */
  57 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
  58
  59 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
  60 static inline void *dbi_kmap_atomic(struct page *page)
  61 {
  62         return kmap_atomic(page);
  63 }
  64 static inline void dbi_kunmap_atomic(void *kvaddr)
  65 {
  66         kunmap_atomic(kvaddr);
  67 }
  68 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  69 static inline void *dbi_kmap_atomic(struct page *page)
  70 {
  71         return kmap_atomic(page, KM_USER0);
  72 }
  73
  74 static inline void dbi_kunmap_atomic(void *kvaddr)
  75 {
  76         kunmap_atomic(kvaddr, KM_USER0);
  77 }
  78 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  79
  80 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  81 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
  82 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate)
  83 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
  84 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
  85 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
  86 DECLARE_MOD_DEP_WRAPPER(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff)
  87 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
  88 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
  89
  90 /* copy_to_user_page */
  91 #ifndef copy_to_user_page
  92 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
  93 DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
  94 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
  95 #endif /* copy_to_user_page */
  96
  97
  98 static DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
  99
 100 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
 101
 102 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 103 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 104 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
 105 #endif
 106 #else
 107 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
 108 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
 109
 110 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 111 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
 112 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 113 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
 114 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 115
 116 #ifdef CONFIG_HUGETLB_PAGE
 117 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, \
 118                 struct vm_area_struct *vma, struct page **pages, \
 119                 struct vm_area_struct **vmas, unsigned long *position, int *length, \
 120                 int i, int write);
 121 #endif
 122
 123 #ifdef __HAVE_ARCH_GATE_AREA
 124 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 125 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
 126 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 127 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
 128 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 129 #endif /* __HAVE_ARCH_GATE_AREA */
 130
 131 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 132 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
 133 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 134 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
 135 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 136
 137 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 138 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
 139                 struct page *, struct vm_area_struct * vma, \
 140                 unsigned long address, unsigned int foll_flags, \
 141                 unsigned int *page_mask);
 142 DECLARE_MOD_DEP_WRAPPER(follow_page_mask, struct page *, \
 143                                 struct vm_area_struct * vma, \
 144                                 unsigned long address, \
 145                                 unsigned int foll_flags, \
 146                                 unsigned int *page_mask)
 147 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
 148 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 149 static DECLARE_MOD_FUNC_DEP(follow_page, \
 150                 struct page *, struct vm_area_struct * vma, \
 151                 unsigned long address, unsigned int foll_flags);
 152 DECLARE_MOD_DEP_WRAPPER(follow_page, struct page *, \
 153                                 struct vm_area_struct * vma, \
 154                                 unsigned long address, \
 155                                 unsigned int foll_flags)
 156 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 157 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 158
 159 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
 160                 void, struct vm_area_struct *vma, struct page *page, \
 161                 unsigned long vmaddr);
 162 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
 163                 struct page *, struct vm_area_struct *vma, \
 164                 unsigned long addr, pte_t pte);
 165 static DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
 166                 void, struct vm_area_struct *vma, struct page *page, \
 167                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
 168
 169
 170 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 171 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 172                 void, struct task_struct *tsk);
 173 #else
 174 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 175                 void, struct rcu_head * rhp);
 176 #endif
 177
 178         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 179 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
 180
 181         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
 182 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
 183
 184 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 185 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 186         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 187                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 188 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 189 #endif
 190 #else
 191         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 192                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 193 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 194 #endif
 195
 196 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 197         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 198                         struct vm_area_struct *, struct mm_struct *mm)
 199 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
 200 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 201         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 202                         struct vm_area_struct *, struct task_struct *tsk)
 203 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 204 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 205
 206 #ifdef CONFIG_HUGETLB_PAGE
 207         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, unsigned int write)
 208         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
 209 #endif
 210
 211 static inline int dbi_in_gate_area(struct task_struct *task, unsigned long addr)
 212 {
 213 #ifdef __HAVE_ARCH_GATE_AREA
 214 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 215         struct mm_struct *mm = task->mm;
 216         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
 217 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 218         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
 219 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 220 #else /*__HAVE_ARCH_GATE_AREA */
 221         return in_gate_area(task, addr);
 222 #endif/*__HAVE_ARCH_GATE_AREA */
 223 }
 224
 225
 226 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 227 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_mm, int, unsigned long addr)
 228 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
 229 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 230 DECLARE_MOD_DEP_WRAPPER(in_gate_area_no_task, int, unsigned long addr)
 231 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
 232 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 233
 234 static inline int dbi_in_gate_area_no_xxx(unsigned long addr)
 235 {
 236 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 237         return in_gate_area_no_mm(addr);
 238 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 239         return in_gate_area_no_task(addr);
 240 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 241 }
 242
 243 DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 244                         void, struct vm_area_struct *vma, \
 245                         struct page *page, unsigned long vmaddr)
 246 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 247
 248 DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 249                         struct page *, struct vm_area_struct *vma, \
 250                         unsigned long addr, pte_t pte)
 251 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 252
 253 DECLARE_MOD_DEP_WRAPPER(flush_ptrace_access, \
 254         void, struct vm_area_struct *vma, struct page *page, \
 255         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 256 IMP_MOD_DEP_WRAPPER(flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 257
 258
 259
 260 int init_module_dependencies(void)
 261 {
 262
 263 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 264         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 265 #endif
 266
 267 #ifndef copy_to_user_page
 268         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
 269 #endif /* copy_to_user_page */
 270
 271         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 272         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 273         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 274
 275 #ifdef CONFIG_HUGETLB_PAGE
 276         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
 277 #endif
 278
 279 #ifdef  __HAVE_ARCH_GATE_AREA
 280         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
 281 #endif
 282
 283 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 284         INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
 285 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 286         INIT_MOD_DEP_VAR(follow_page, follow_page);
 287 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 288
 289 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 290
 291 #ifndef is_zero_pfn
 292         swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
 293 #endif /* is_zero_pfn */
 294
 295         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
 296 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 297         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 298 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 299
 300         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 301         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 302         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 303
 304 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 305 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 306         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 307 # else
 308         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 309 # endif
 310 #else /*2.6.16 */
 311         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 312 #endif
 313 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 314         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
 315 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
 316
 317         return 0;
 318 }
 319
 320 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
 321 #define GUP_FLAGS_WRITE                  0x1
 322 #define GUP_FLAGS_FORCE                  0x2
 323 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 324 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 325 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
 326
 327 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 328 static inline int use_zero_page(struct vm_area_struct *vma)
 329 {
 330         /*
 331          * We don't want to optimize FOLL_ANON for make_pages_present()
 332          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 333          * we want to get the page from the page tables to make sure
 334          * that we serialize and update with any other user of that
 335          * mapping.
 336          */
 337         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 338                 return 0;
 339         /*
 340          * And if we have a fault routine, it's not an anonymous region.
 341          */
 342         return !vma->vm_ops || !vma->vm_ops->fault;
 343 }
 344
 345 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 346
 347 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 348
 349 #ifdef __HAVE_COLOR_ZERO_PAGE
 350
 351 static inline int swap_is_zero_pfn(unsigned long pfn)
 352 {
 353         unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
 354         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 355 }
 356
 357 #else /* __HAVE_COLOR_ZERO_PAGE */
 358
 359 static inline int swap_is_zero_pfn(unsigned long pfn)
 360 {
 361         return pfn == swap_zero_pfn;
 362 }
 363 #endif /* __HAVE_COLOR_ZERO_PAGE */
 364
 365 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 366
 367 static inline int swap_is_zero_pfn(unsigned long pfn)
 368 {
 369 #ifndef is_zero_pfn
 370         return pfn == swap_zero_pfn;
 371 #else /* is_zero_pfn */
 372         return is_zero_pfn(pfn);
 373 #endif /* is_zero_pfn */
 374 }
 375
 376 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 377
 378 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 379 {
 380         return stack_guard_page_start(vma, addr) ||
 381                         stack_guard_page_end(vma, addr+PAGE_SIZE);
 382 }
 383
 384 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 385
 386 static inline struct page *follow_page_uprobe(struct vm_area_struct *vma,
 387         unsigned long address, unsigned int foll_flags)
 388 {
 389     unsigned int unused_page_mask;
 390     return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
 391 }
 392
 393 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 394                 unsigned long start, unsigned long nr_pages,
 395                 unsigned int gup_flags, struct page **pages,
 396                 struct vm_area_struct **vmas, int *nonblocking)
 397 {
 398         long i;
 399         unsigned long vm_flags;
 400         unsigned int page_mask;
 401
 402         if (!nr_pages)
 403                 return 0;
 404
 405         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 406
 407         /*
 408          * Require read or write permissions.
 409          * If FOLL_FORCE is set, we only require the "MAY" flags.
 410          */
 411         vm_flags  = (gup_flags & FOLL_WRITE) ?
 412                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 413         vm_flags &= (gup_flags & FOLL_FORCE) ?
 414                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 415
 416         /*
 417          * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
 418          * would be called on PROT_NONE ranges. We must never invoke
 419          * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
 420          * page faults would unprotect the PROT_NONE ranges if
 421          * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
 422          * bitflag. So to avoid that, don't set FOLL_NUMA if
 423          * FOLL_FORCE is set.
 424          */
 425         if (!(gup_flags & FOLL_FORCE))
 426                 gup_flags |= FOLL_NUMA;
 427
 428         i = 0;
 429
 430         do {
 431                 struct vm_area_struct *vma;
 432
 433                 vma = find_extend_vma(mm, start);
 434                 if (!vma && dbi_in_gate_area(tsk, start)) {
 435                         unsigned long pg = start & PAGE_MASK;
 436                         pgd_t *pgd;
 437                         pud_t *pud;
 438                         pmd_t *pmd;
 439                         pte_t *pte;
 440
 441                         /* user gate pages are read-only */
 442                         if (gup_flags & FOLL_WRITE)
 443                                 return i ? : -EFAULT;
 444                         if (pg > TASK_SIZE)
 445                                 pgd = pgd_offset_k(pg);
 446                         else
 447                                 pgd = pgd_offset_gate(mm, pg);
 448                         BUG_ON(pgd_none(*pgd));
 449                         pud = pud_offset(pgd, pg);
 450                         BUG_ON(pud_none(*pud));
 451                         pmd = pmd_offset(pud, pg);
 452                         if (pmd_none(*pmd))
 453                                 return i ? : -EFAULT;
 454                         VM_BUG_ON(pmd_trans_huge(*pmd));
 455                         pte = pte_offset_map(pmd, pg);
 456                         if (pte_none(*pte)) {
 457                                 pte_unmap(pte);
 458                                 return i ? : -EFAULT;
 459                         }
 460                         vma = get_gate_vma(mm);
 461                         if (pages) {
 462                                 struct page *page;
 463
 464                                 page = vm_normal_page(vma, start, *pte);
 465                                 if (!page) {
 466                                         if (!(gup_flags & FOLL_DUMP) &&
 467                                              swap_is_zero_pfn(pte_pfn(*pte)))
 468                                                 page = pte_page(*pte);
 469                                         else {
 470                                                 pte_unmap(pte);
 471                                                 return i ? : -EFAULT;
 472                                         }
 473                                 }
 474                                 pages[i] = page;
 475                                 get_page(page);
 476                         }
 477                         pte_unmap(pte);
 478                         page_mask = 0;
 479                         goto next_page;
 480                 }
 481
 482                 if (!vma ||
 483                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 484                     !(vm_flags & vma->vm_flags))
 485                         return i ? : -EFAULT;
 486
 487                 if (is_vm_hugetlb_page(vma)) {
 488                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 489                                         &start, &nr_pages, i, gup_flags);
 490                         continue;
 491                 }
 492
 493                 do {
 494                         struct page *page;
 495                         unsigned int foll_flags = gup_flags;
 496                         unsigned int page_increm;
 497
 498                         /*
 499                          * If we have a pending SIGKILL, don't keep faulting
 500                          * pages and potentially allocating memory.
 501                          */
 502                         if (unlikely(fatal_signal_pending(current)))
 503                                 return i ? i : -ERESTARTSYS;
 504
 505                         /* cond_resched(); */
 506                         while (!(page = follow_page_mask(vma, start,
 507                                                 foll_flags, &page_mask))) {
 508                                 int ret;
 509                                 unsigned int fault_flags = 0;
 510
 511                                 /* For mlock, just skip the stack guard page. */
 512                                 if (foll_flags & FOLL_MLOCK) {
 513                                         if (stack_guard_page(vma, start))
 514                                                 goto next_page;
 515                                 }
 516                                 if (foll_flags & FOLL_WRITE)
 517                                         fault_flags |= FAULT_FLAG_WRITE;
 518                                 if (nonblocking)
 519                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 520                                 if (foll_flags & FOLL_NOWAIT)
 521                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 522
 523                                 ret = handle_mm_fault(mm, vma, start,
 524                                                         fault_flags);
 525
 526                                 if (ret & VM_FAULT_ERROR) {
 527                                         if (ret & VM_FAULT_OOM)
 528                                                 return i ? i : -ENOMEM;
 529                                         if (ret & (VM_FAULT_HWPOISON |
 530                                                    VM_FAULT_HWPOISON_LARGE)) {
 531                                                 if (i)
 532                                                         return i;
 533                                                 else if (gup_flags & FOLL_HWPOISON)
 534                                                         return -EHWPOISON;
 535                                                 else
 536                                                         return -EFAULT;
 537                                         }
 538                                         if (ret & VM_FAULT_SIGBUS)
 539                                                 return i ? i : -EFAULT;
 540                                         BUG();
 541                                 }
 542
 543                                 if (tsk) {
 544                                         if (ret & VM_FAULT_MAJOR)
 545                                                 tsk->maj_flt++;
 546                                         else
 547                                                 tsk->min_flt++;
 548                                 }
 549
 550                                 if (ret & VM_FAULT_RETRY) {
 551                                         if (nonblocking)
 552                                                 *nonblocking = 0;
 553                                         return i;
 554                                 }
 555
 556                                 /*
 557                                  * The VM_FAULT_WRITE bit tells us that
 558                                  * do_wp_page has broken COW when necessary,
 559                                  * even if maybe_mkwrite decided not to set
 560                                  * pte_write. We can thus safely do subsequent
 561                                  * page lookups as if they were reads. But only
 562                                  * do so when looping for pte_write is futile:
 563                                  * in some cases userspace may also be wanting
 564                                  * to write to the gotten user page, which a
 565                                  * read fault here might prevent (a readonly
 566                                  * page might get reCOWed by userspace write).
 567                                  */
 568                                 if ((ret & VM_FAULT_WRITE) &&
 569                                     !(vma->vm_flags & VM_WRITE))
 570                                         foll_flags &= ~FOLL_WRITE;
 571
 572                                 /* cond_resched(); */
 573                         }
 574                         if (IS_ERR(page))
 575                                 return i ? i : PTR_ERR(page);
 576                         if (pages) {
 577                                 pages[i] = page;
 578
 579                                 flush_anon_page(vma, page, start);
 580                                 flush_dcache_page(page);
 581                                 page_mask = 0;
 582                         }
 583 next_page:
 584                         if (vmas) {
 585                                 vmas[i] = vma;
 586                                 page_mask = 0;
 587                         }
 588                         page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
 589                         if (page_increm > nr_pages)
 590                                 page_increm = nr_pages;
 591                         i += page_increm;
 592                         start += page_increm * PAGE_SIZE;
 593                         nr_pages -= page_increm;
 594                 } while (nr_pages && start < vma->vm_end);
 595         } while (nr_pages);
 596         return i;
 597 }
 598
 599 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 600
 601 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 602                         unsigned long start, int nr_pages, unsigned int gup_flags,
 603                         struct page **pages, struct vm_area_struct **vmas,
 604                         int *nonblocking)
 605 {
 606         int i;
 607         unsigned long vm_flags;
 608
 609         if (nr_pages <= 0) {
 610                 return 0;
 611         }
 612
 613         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 614
 615         /*
 616          * Require read or write permissions.
 617          * If FOLL_FORCE is set, we only require the "MAY" flags.
 618          */
 619         vm_flags  = (gup_flags & FOLL_WRITE) ?
 620                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 621         vm_flags &= (gup_flags & FOLL_FORCE) ?
 622                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 623         i = 0;
 624
 625         do {
 626                 struct vm_area_struct *vma;
 627
 628                 vma = find_extend_vma(mm, start);
 629                 if (!vma && dbi_in_gate_area_no_xxx(start)) {
 630                         unsigned long pg = start & PAGE_MASK;
 631                         pgd_t *pgd;
 632                         pud_t *pud;
 633                         pmd_t *pmd;
 634                         pte_t *pte;
 635
 636                         /* user gate pages are read-only */
 637                         if (gup_flags & FOLL_WRITE) {
 638                                 return i ? : -EFAULT;
 639                         }
 640                         if (pg > TASK_SIZE)
 641                                 pgd = pgd_offset_k(pg);
 642                         else
 643                                 pgd = pgd_offset_gate(mm, pg);
 644                         BUG_ON(pgd_none(*pgd));
 645                         pud = pud_offset(pgd, pg);
 646                         BUG_ON(pud_none(*pud));
 647                         pmd = pmd_offset(pud, pg);
 648                         if (pmd_none(*pmd)) {
 649                                 return i ? : -EFAULT;
 650                         }
 651                         VM_BUG_ON(pmd_trans_huge(*pmd));
 652                         pte = pte_offset_map(pmd, pg);
 653                         if (pte_none(*pte)) {
 654                                 pte_unmap(pte);
 655                                 return i ? : -EFAULT;
 656                         }
 657                         vma = get_gate_vma(mm);
 658                         if (pages) {
 659                                 struct page *page;
 660
 661                                 page = vm_normal_page(vma, start, *pte);
 662                                 if (!page) {
 663                                         if (!(gup_flags & FOLL_DUMP) &&
 664                                                 swap_is_zero_pfn(pte_pfn(*pte)))
 665                                                 page = pte_page(*pte);
 666                                         else {
 667                                                 pte_unmap(pte);
 668                                                 return i ? : -EFAULT;
 669                                         }
 670                                 }
 671                                 pages[i] = page;
 672                                 get_page(page);
 673                         }
 674                         pte_unmap(pte);
 675                         goto next_page;
 676                 }
 677
 678                 if (!vma ||
 679                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 680                         !(vm_flags & vma->vm_flags)) {
 681                         return i ? : -EFAULT;
 682                 }
 683
 684                 if (is_vm_hugetlb_page(vma)) {
 685                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 686                                         &start, &nr_pages, i, gup_flags);
 687                         continue;
 688                 }
 689
 690                 do {
 691                         struct page *page;
 692                         unsigned int foll_flags = gup_flags;
 693
 694                         /*
 695                          * If we have a pending SIGKILL, don't keep faulting
 696                          * pages and potentially allocating memory.
 697                          */
 698                         if (unlikely(fatal_signal_pending(current))) {
 699                                 return i ? i : -ERESTARTSYS;
 700                         }
 701
 702                         /* cond_resched(); */
 703                         while (!(page = follow_page(vma, start, foll_flags))) {
 704                                 int ret;
 705                                 unsigned int fault_flags = 0;
 706
 707                                 /* For mlock, just skip the stack guard page. */
 708                                 if (foll_flags & FOLL_MLOCK) {
 709                                         if (stack_guard_page(vma, start))
 710                                                 goto next_page;
 711                                 }
 712                                 if (foll_flags & FOLL_WRITE)
 713                                         fault_flags |= FAULT_FLAG_WRITE;
 714                                 if (nonblocking)
 715                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 716                                 if (foll_flags & FOLL_NOWAIT)
 717                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 718
 719                                 ret = handle_mm_fault(mm, vma, start,
 720                                                         fault_flags);
 721
 722                                 if (ret & VM_FAULT_ERROR) {
 723                                         if (ret & VM_FAULT_OOM) {
 724                                                 return i ? i : -ENOMEM;
 725                                         }
 726                                         if (ret & (VM_FAULT_HWPOISON |
 727                                                                 VM_FAULT_HWPOISON_LARGE)) {
 728                                                 if (i) {
 729                                                         return i;
 730                                                 }
 731                                                 else if (gup_flags & FOLL_HWPOISON) {
 732                                                         return -EHWPOISON;
 733                                                 }
 734                                                 else {
 735                                                         return -EFAULT;
 736                                                 }
 737                                         }
 738                                         if (ret & VM_FAULT_SIGBUS) {
 739                                                 return i ? i : -EFAULT;
 740                                         }
 741                                         BUG();
 742                                 }
 743
 744                                 if (tsk) {
 745                                         if (ret & VM_FAULT_MAJOR)
 746                                                 tsk->maj_flt++;
 747                                         else
 748                                                 tsk->min_flt++;
 749                                 }
 750
 751                                 if (ret & VM_FAULT_RETRY) {
 752                                         if (nonblocking)
 753                                                 *nonblocking = 0;
 754                                         return i;
 755                                 }
 756
 757                                 /*
 758                                  * The VM_FAULT_WRITE bit tells us that
 759                                  * do_wp_page has broken COW when necessary,
 760                                  * even if maybe_mkwrite decided not to set
 761                                  * pte_write. We can thus safely do subsequent
 762                                  * page lookups as if they were reads. But only
 763                                  * do so when looping for pte_write is futile:
 764                                  * in some cases userspace may also be wanting
 765                                  * to write to the gotten user page, which a
 766                                  * read fault here might prevent (a readonly
 767                                  * page might get reCOWed by userspace write).
 768                                  */
 769                                 if ((ret & VM_FAULT_WRITE) &&
 770                                         !(vma->vm_flags & VM_WRITE))
 771                                         foll_flags &= ~FOLL_WRITE;
 772
 773                                 /* cond_resched(); */
 774                         }
 775                         if (IS_ERR(page)) {
 776                                 return i ? i : PTR_ERR(page);
 777                         }
 778                         if (pages) {
 779                                 pages[i] = page;
 780
 781                                 flush_anon_page(vma, page, start);
 782                                 flush_dcache_page(page);
 783                         }
 784 next_page:
 785                         if (vmas)
 786                                 vmas[i] = vma;
 787                         i++;
 788                         start += PAGE_SIZE;
 789                         nr_pages--;
 790                 } while (nr_pages && start < vma->vm_end);
 791         } while (nr_pages);
 792
 793         return i;
 794 }
 795
 796 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 797
 798 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 799
 800 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 801                 unsigned long start, int len, int flags,
 802                 struct page **pages, struct vm_area_struct **vmas)
 803 {
 804         int i;
 805         unsigned int vm_flags = 0;
 806         int write = !!(flags & GUP_FLAGS_WRITE);
 807         int force = !!(flags & GUP_FLAGS_FORCE);
 808         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 809
 810         if (len <= 0)
 811                 return 0;
 812         /*
 813          * Require read or write permissions.
 814          * If 'force' is set, we only require the "MAY" flags.
 815          */
 816         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 817         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 818         i = 0;
 819
 820         do {
 821                 struct vm_area_struct *vma;
 822                 unsigned int foll_flags;
 823
 824                 vma = find_vma(mm, start);
 825                 if (!vma && dbi_in_gate_area(tsk, start)) {
 826                         unsigned long pg = start & PAGE_MASK;
 827                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 828                         pgd_t *pgd;
 829                         pud_t *pud;
 830                         pmd_t *pmd;
 831                         pte_t *pte;
 832
 833                         /* user gate pages are read-only */
 834                         if (!ignore && write)
 835                                 return i ? : -EFAULT;
 836                         if (pg > TASK_SIZE)
 837                                 pgd = pgd_offset_k(pg);
 838                         else
 839                                 pgd = pgd_offset_gate(mm, pg);
 840                         BUG_ON(pgd_none(*pgd));
 841                         pud = pud_offset(pgd, pg);
 842                         BUG_ON(pud_none(*pud));
 843                         pmd = pmd_offset(pud, pg);
 844                         if (pmd_none(*pmd))
 845                                 return i ? : -EFAULT;
 846                         pte = pte_offset_map(pmd, pg);
 847                         if (pte_none(*pte)) {
 848                                 pte_unmap(pte);
 849                                 return i ? : -EFAULT;
 850                         }
 851                         if (pages) {
 852                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 853                                 pages[i] = page;
 854                                 if (page)
 855                                         get_page(page);
 856                         }
 857                         pte_unmap(pte);
 858                         if (vmas)
 859                                 vmas[i] = gate_vma;
 860                         i++;
 861                         start += PAGE_SIZE;
 862                         len--;
 863                         continue;
 864                 }
 865
 866                 if (!vma ||
 867                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 868                         (!ignore && !(vm_flags & vma->vm_flags)))
 869                         return i ? : -EFAULT;
 870
 871                 if (is_vm_hugetlb_page(vma)) {
 872 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 873                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 874                                                 &start, &len, i);
 875 #else
 876                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 877                                                 &start, &len, i, write);
 878 #endif
 879                         continue;
 880                 }
 881
 882                 foll_flags = FOLL_TOUCH;
 883                 if (pages)
 884                         foll_flags |= FOLL_GET;
 885
 886 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 887 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 888                 if (!write && use_zero_page(vma))
 889                         foll_flags |= FOLL_ANON;
 890 #endif
 891 #endif
 892
 893                 do {
 894                         struct page *page;
 895
 896                         if (write)
 897                                 foll_flags |= FOLL_WRITE;
 898
 899
 900                         //cond_resched();
 901
 902                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 903                         while (!(page = follow_page(vma, start, foll_flags))) {
 904                                 int ret;
 905                                 ret = handle_mm_fault(mm, vma, start,
 906                                                 foll_flags & FOLL_WRITE);
 907
 908 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 909                                 if (ret & VM_FAULT_WRITE)
 910                                         foll_flags &= ~FOLL_WRITE;
 911
 912                                 switch (ret & ~VM_FAULT_WRITE) {
 913                                 case VM_FAULT_MINOR:
 914                                         tsk->min_flt++;
 915                                         break;
 916                                 case VM_FAULT_MAJOR:
 917                                         tsk->maj_flt++;
 918                                         break;
 919                                 case VM_FAULT_SIGBUS:
 920                                         return i ? i : -EFAULT;
 921                                 case VM_FAULT_OOM:
 922                                         return i ? i : -ENOMEM;
 923                                 default:
 924                                         BUG();
 925                                 }
 926
 927 #else
 928                                 if (ret & VM_FAULT_ERROR) {
 929                                         if (ret & VM_FAULT_OOM)
 930                                                 return i ? i : -ENOMEM;
 931                                         else if (ret & VM_FAULT_SIGBUS)
 932                                                 return i ? i : -EFAULT;
 933                                         BUG();
 934                                 }
 935                                 if (ret & VM_FAULT_MAJOR)
 936                                         tsk->maj_flt++;
 937                                 else
 938                                         tsk->min_flt++;
 939
 940                                 /*
 941                                  * The VM_FAULT_WRITE bit tells us that
 942                                  * do_wp_page has broken COW when necessary,
 943                                  * even if maybe_mkwrite decided not to set
 944                                  * pte_write. We can thus safely do subsequent
 945                                  * page lookups as if they were reads. But only
 946                                  * do so when looping for pte_write is futile:
 947                                  * in some cases userspace may also be wanting
 948                                  * to write to the gotten user page, which a
 949                                  * read fault here might prevent (a readonly
 950                                  * page might get reCOWed by userspace write).
 951                                  */
 952                                 if ((ret & VM_FAULT_WRITE) &&
 953                                                 !(vma->vm_flags & VM_WRITE))
 954                                         foll_flags &= ~FOLL_WRITE;
 955
 956                                 //cond_resched();
 957 #endif
 958
 959                         }
 960
 961                         if (IS_ERR(page))
 962                                 return i ? i : PTR_ERR(page);
 963                         if (pages) {
 964                                 pages[i] = page;
 965
 966 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 967                                 flush_anon_page(page, start);
 968 #else
 969                                 flush_anon_page(vma, page, start);
 970 #endif
 971                                 flush_dcache_page(page);
 972                         }
 973                         if (vmas)
 974                                 vmas[i] = vma;
 975                         i++;
 976                         start += PAGE_SIZE;
 977                         len--;
 978                 } while (len && start < vma->vm_end);
 979         } while (len);
 980         return i;
 981 }
 982 #endif
 983 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 984
 985 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 986                 unsigned long start, int len, int write, int force,
 987                 struct page **pages, struct vm_area_struct **vmas)
 988 {
 989 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 990 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
 991         int flags = FOLL_TOUCH;
 992
 993         if (pages)
 994                 flags |= FOLL_GET;
 995         if (write)
 996                 flags |= FOLL_WRITE;
 997         if (force)
 998                 flags |= FOLL_FORCE;
 999 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1000         int flags = 0;
1001
1002         if (write)
1003                 flags |= GUP_FLAGS_WRITE;
1004         if (force)
1005                 flags |= GUP_FLAGS_FORCE;
1006 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1007
1008         return __get_user_pages_uprobe(tsk, mm,
1009                                 start, len, flags,
1010 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1011                                                 pages, vmas, NULL);
1012 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1013                                                 pages, vmas);
1014 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1015 #else
1016         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1017 #endif
1018 }
1019
1020 #define ACCESS_PROCESS_OPTIMIZATION 0
1021
1022 #if ACCESS_PROCESS_OPTIMIZATION
1023
1024 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1025 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1026
1027 static void read_data_current(unsigned long addr, void *buf, int len)
1028 {
1029         int step;
1030         int pos = 0;
1031
1032         for (step = GET_STEP_4(len); len; len -= step) {
1033                 switch (GET_STEP_4(len)) {
1034                 case 1:
1035                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1036                         step = 1;
1037                         break;
1038
1039                 case 2:
1040                 case 3:
1041                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1042                         step = 2;
1043                         break;
1044
1045                 case 4:
1046                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1047                         step = 4;
1048                         break;
1049                 }
1050
1051                 pos += step;
1052         }
1053 }
1054
1055 // not working
1056 static void write_data_current(unsigned long addr, void *buf, int len)
1057 {
1058         int step;
1059         int pos = 0;
1060
1061         for (step = GET_STEP_4(len); len; len -= step) {
1062                 switch (GET_STEP_4(len)) {
1063                 case 1:
1064                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1065                         step = 1;
1066                         break;
1067
1068                 case 2:
1069                 case 3:
1070                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1071                         step = 2;
1072                         break;
1073
1074                 case 4:
1075                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1076                         step = 4;
1077                         break;
1078                 }
1079
1080                 pos += step;
1081         }
1082 }
1083 #endif
1084
1085 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1086 {
1087         struct mm_struct *mm;
1088         struct vm_area_struct *vma;
1089         void *old_buf = buf;
1090
1091         if (len <= 0) {
1092                 return -1;
1093         }
1094
1095 #if ACCESS_PROCESS_OPTIMIZATION
1096         if (write == 0 && tsk == current) {
1097                 read_data_current(addr, buf, len);
1098                 return len;
1099         }
1100 #endif
1101
1102         mm = tsk->mm; /* function 'get_task_mm' is to be called */
1103         if (!mm)
1104                 return 0;
1105
1106         /* ignore errors, just check how much was successfully transferred */
1107         while (len) {
1108                 int bytes, ret, offset;
1109                 void *maddr;
1110                 struct page *page = NULL;
1111
1112                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1113                                                 write, 1, &page, &vma);
1114
1115                 if (ret <= 0) {
1116                         /*
1117                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
1118                          * we can access using slightly different code.
1119                          */
1120 #ifdef CONFIG_HAVE_IOREMAP_PROT
1121                         vma = find_vma(mm, addr);
1122                         if (!vma)
1123                                 break;
1124                         if (vma->vm_ops && vma->vm_ops->access)
1125                                 ret = vma->vm_ops->access(vma, addr, buf,
1126                                                         len, write);
1127                         if (ret <= 0)
1128 #endif
1129                                 break;
1130                         bytes = ret;
1131                 } else {
1132                         bytes = len;
1133                         offset = addr & (PAGE_SIZE-1);
1134                         if (bytes > PAGE_SIZE-offset)
1135                                 bytes = PAGE_SIZE-offset;
1136
1137                         maddr = dbi_kmap_atomic(page);
1138
1139                         if (write) {
1140                                 copy_to_user_page(vma, page, addr,
1141                                                         maddr + offset, buf, bytes);
1142                                 set_page_dirty_lock(page);
1143                         } else {
1144                                 copy_from_user_page(vma, page, addr,
1145                                                         buf, maddr + offset, bytes);
1146                         }
1147
1148                         dbi_kunmap_atomic(maddr);
1149                         page_cache_release(page);
1150                 }
1151                 len -= bytes;
1152                 buf += bytes;
1153                 addr += bytes;
1154         }
1155
1156         return buf - old_buf;
1157 }
1158
1159 int page_present (struct mm_struct *mm, unsigned long address)
1160 {
1161         pgd_t *pgd;
1162         pud_t *pud;
1163         pmd_t *pmd;
1164         pte_t *ptep, pte;
1165         unsigned long pfn;
1166
1167         pgd = pgd_offset(mm, address);
1168         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1169                 goto out;
1170
1171         pud = pud_offset(pgd, address);
1172         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1173                 goto out;
1174
1175         pmd = pmd_offset(pud, address);
1176         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1177                 goto out;
1178
1179         ptep = pte_offset_map(pmd, address);
1180         if (!ptep)
1181                 goto out;
1182
1183         pte = *ptep;
1184         pte_unmap(ptep);
1185         if (pte_present(pte)) {
1186                 pfn = pte_pfn(pte);
1187                 if (pfn_valid(pfn)) {
1188                         return 1;
1189                 }
1190         }
1191
1192 out:
1193         return 0;
1194 }
1195
1196
1197 EXPORT_SYMBOL_GPL (page_present);
1198 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
1199