kprobe/swap_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/swap_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25  */
  26
  27 #include <linux/module.h>
  28 #include <linux/sched.h>
  29
  30 #include <asm/pgtable.h>
  31
  32 #include "swap_kprobes_deps.h"
  33 #include "swap_kdebug.h"
  34
  35
  36 #include <linux/slab.h>
  37 #include <linux/mm.h>
  38
  39 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  40 /* kernel define 'pgd_offset_k' redefinition */
  41 #undef pgd_offset_k
  42 #define pgd_offset_k(addr)      pgd_offset(init_task.active_mm, addr)
  43 #endif
  44
  45 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))
  46 #ifndef is_zero_pfn
  47
  48 static unsigned long swap_zero_pfn = 0;
  49
  50 #endif /* is_zero_pfn */
  51 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)) */
  52
  53 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36)
  54 static inline void *swap_kmap_atomic(struct page *page)
  55 {
  56         return kmap_atomic(page);
  57 }
  58 static inline void swap_kunmap_atomic(void *kvaddr)
  59 {
  60         kunmap_atomic(kvaddr);
  61 }
  62 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  63 static inline void *swap_kmap_atomic(struct page *page)
  64 {
  65         return kmap_atomic(page, KM_USER0);
  66 }
  67
  68 static inline void swap_kunmap_atomic(void *kvaddr)
  69 {
  70         kunmap_atomic(kvaddr, KM_USER0);
  71 }
  72 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 36) */
  73
  74 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
  75 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate);
  76 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
  77                         unsigned long,
  78                         struct file *file, unsigned long addr,
  79                         unsigned long len, unsigned long prot,
  80                         unsigned long flags, unsigned long pgoff,
  81                         unsigned long *populate)
  82 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff, populate)
  83 #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
  84 DECLARE_MOD_FUNC_DEP(do_mmap_pgoff, unsigned long, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff);
  85 DECLARE_MOD_DEP_WRAPPER(swap_do_mmap_pgoff,
  86                         unsigned long,
  87                         struct file *file, unsigned long addr,
  88                         unsigned long len, unsigned long prot,
  89                         unsigned long flags, unsigned long pgoff)
  90 IMP_MOD_DEP_WRAPPER(do_mmap_pgoff, file, addr, len, prot, flags, pgoff)
  91 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */
  92
  93 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
  94 EXPORT_SYMBOL_GPL(swap_do_mmap_pgoff);
  95 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
  96
  97 /* copy_to_user_page */
  98 #ifndef copy_to_user_page
  99 static DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
 100 DECLARE_MOD_DEP_WRAPPER(swap_copy_to_user_page,
 101                         void,
 102                         struct vm_area_struct *vma, struct page *page,
 103                         unsigned long uaddr, void *dst, const void *src,
 104                         unsigned long len)
 105 IMP_MOD_DEP_WRAPPER(copy_to_user_page, vma, page, uaddr, dst, src, len)
 106 #else /* copy_to_user_page */
 107 #define swap_copy_to_user_page copy_to_user_page
 108 #endif /* copy_to_user_page */
 109
 110
 111 static DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
 112
 113 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 114 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 115 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
 116 #endif
 117 #else
 118 static DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
 119 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
 120
 121 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 122 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct mm_struct *mm);
 123 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 124 static DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
 125 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 126
 127 #ifdef __HAVE_ARCH_GATE_AREA
 128 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 129 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct mm_struct *mm, unsigned long addr);
 130 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 131 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *task, unsigned long addr);
 132 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 133 #endif /* __HAVE_ARCH_GATE_AREA */
 134
 135 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 136 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_mm, int, unsigned long addr);
 137 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 138 static DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
 139 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 140
 141 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 142 static DECLARE_MOD_FUNC_DEP(follow_page_mask, \
 143                 struct page *, struct vm_area_struct * vma, \
 144                 unsigned long address, unsigned int foll_flags, \
 145                 unsigned int *page_mask);
 146 DECLARE_MOD_DEP_WRAPPER(swap_follow_page_mask,
 147                         struct page *,
 148                         struct vm_area_struct * vma, unsigned long address,
 149                         unsigned int foll_flags, unsigned int *page_mask)
 150 IMP_MOD_DEP_WRAPPER (follow_page_mask, vma, address, foll_flags, page_mask)
 151 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 152 static DECLARE_MOD_FUNC_DEP(follow_page, \
 153                 struct page *, struct vm_area_struct * vma, \
 154                 unsigned long address, unsigned int foll_flags);
 155 DECLARE_MOD_DEP_WRAPPER(swap_follow_page,
 156                         struct page *,
 157                         struct vm_area_struct * vma, unsigned long address,
 158                         unsigned int foll_flags)
 159 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 160 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 161
 162 static DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
 163                 void, struct vm_area_struct *vma, struct page *page, \
 164                 unsigned long vmaddr);
 165 static DECLARE_MOD_FUNC_DEP(vm_normal_page, \
 166                 struct page *, struct vm_area_struct *vma, \
 167                 unsigned long addr, pte_t pte);
 168
 169
 170 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 171 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 172                 void, struct task_struct *tsk);
 173 #else
 174 static DECLARE_MOD_FUNC_DEP(put_task_struct, \
 175                 void, struct rcu_head * rhp);
 176 #endif
 177
 178 DECLARE_MOD_DEP_WRAPPER(swap_find_extend_vma,
 179                         struct vm_area_struct *,
 180                         struct mm_struct * mm, unsigned long addr)
 181 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
 182
 183 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 184 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 185 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
 186                         int,
 187                         struct mm_struct *mm, struct vm_area_struct *vma,
 188                         unsigned long address, int write_access)
 189 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 190 #endif
 191 #else
 192 DECLARE_MOD_DEP_WRAPPER(swap_handle_mm_fault,
 193                         int,
 194                         struct mm_struct *mm, struct vm_area_struct *vma,
 195                         unsigned long address, unsigned int flags)
 196 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 197 #endif
 198
 199 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 200 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
 201                         struct vm_area_struct *,
 202                         struct mm_struct *mm)
 203 IMP_MOD_DEP_WRAPPER (get_gate_vma, mm)
 204 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 205 DECLARE_MOD_DEP_WRAPPER(swap_get_gate_vma,
 206                         struct vm_area_struct *,
 207                         struct task_struct *tsk)
 208 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 209 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 210
 211 #ifdef CONFIG_HUGETLB_PAGE
 212
 213 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
 214 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page,                                       \
 215                      int,                                                       \
 216                      struct mm_struct *mm, struct vm_area_struct *vma,          \
 217                      struct page **pages, struct vm_area_struct **vmas,         \
 218                      unsigned long *position, int *length, int i,               \
 219                      unsigned int flags);
 220 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
 221                         int,
 222                         struct mm_struct *mm, struct vm_area_struct *vma,
 223                         struct page **pages, struct vm_area_struct **vmas,
 224                         unsigned long *position, int *length, int i,
 225                         unsigned int flags)
 226 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page,                                        \
 227                     mm, vma, pages, vmas, position, length, i, flags)
 228 #else /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
 229 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page,                                       \
 230                      long,                                                      \
 231                      struct mm_struct *mm, struct vm_area_struct *vma,          \
 232                      struct page **pages, struct vm_area_struct **vmas,         \
 233                      unsigned long *position, unsigned long *nr_pages,          \
 234                      long i, unsigned int flags);
 235 DECLARE_MOD_DEP_WRAPPER(swap_follow_hugetlb_page,
 236                         long,
 237                         struct mm_struct *mm, struct vm_area_struct *vma,
 238                         struct page **pages, struct vm_area_struct **vmas,
 239                         unsigned long *position, unsigned long *nr_pages,
 240                         long i, unsigned int flags)
 241 IMP_MOD_DEP_WRAPPER(follow_hugetlb_page,                                        \
 242                     mm, vma, pages, vmas, position, nr_pages, i, flags)
 243 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) */
 244
 245 #else /* CONFIG_HUGETLB_PAGE */
 246 #define swap_follow_hugetlb_page follow_hugetlb_page
 247 #endif /* CONFIG_HUGETLB_PAGE */
 248
 249 static inline int swap_in_gate_area(struct task_struct *task, unsigned long addr)
 250 {
 251 #ifdef __HAVE_ARCH_GATE_AREA
 252 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 253         struct mm_struct *mm = task->mm;
 254         IMP_MOD_DEP_WRAPPER (in_gate_area, mm, addr)
 255 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 256         IMP_MOD_DEP_WRAPPER (in_gate_area, task, addr)
 257 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 258 #else /*__HAVE_ARCH_GATE_AREA */
 259         return in_gate_area(task, addr);
 260 #endif/*__HAVE_ARCH_GATE_AREA */
 261 }
 262
 263
 264 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 265 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_mm, int, unsigned long addr)
 266 IMP_MOD_DEP_WRAPPER(in_gate_area_no_mm, addr)
 267 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 268 DECLARE_MOD_DEP_WRAPPER(swap_in_gate_area_no_task, int, unsigned long addr)
 269 IMP_MOD_DEP_WRAPPER(in_gate_area_no_task, addr)
 270 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 271
 272 static inline int swap_in_gate_area_no_xxx(unsigned long addr)
 273 {
 274 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 275         return swap_in_gate_area_no_mm(addr);
 276 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 277         return swap_in_gate_area_no_task(addr);
 278 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 279 }
 280
 281 DECLARE_MOD_DEP_WRAPPER(swap__flush_anon_page,
 282                         void,
 283                         struct vm_area_struct *vma, struct page *page,
 284                         unsigned long vmaddr)
 285 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 286
 287 static inline void swap_flush_anon_page(struct vm_area_struct *vma,
 288                                         struct page *page,
 289                                         unsigned long vmaddr)
 290 {
 291 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
 292         if (PageAnon(page))
 293                 swap__flush_anon_page(vma, page, vmaddr);
 294 #else /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
 295         flush_anon_page(vma, page, vmaddr);
 296 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
 297 }
 298
 299 DECLARE_MOD_DEP_WRAPPER(swap_vm_normal_page,
 300                         struct page *,
 301                         struct vm_area_struct *vma, unsigned long addr,
 302                         pte_t pte)
 303 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 304
 305
 306
 307
 308 int init_module_dependencies(void)
 309 {
 310
 311 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 312         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 313 #endif
 314
 315 #ifndef copy_to_user_page
 316         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
 317 #endif /* copy_to_user_page */
 318
 319         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 320         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 321
 322 #ifdef CONFIG_HUGETLB_PAGE
 323         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
 324 #endif
 325
 326 #ifdef  __HAVE_ARCH_GATE_AREA
 327         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
 328 #endif
 329
 330 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 331         INIT_MOD_DEP_VAR(follow_page_mask, follow_page_mask);
 332 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 333         INIT_MOD_DEP_VAR(follow_page, follow_page);
 334 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 335
 336 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 337
 338 #ifndef is_zero_pfn
 339         swap_zero_pfn = page_to_pfn(ZERO_PAGE(0));
 340 #endif /* is_zero_pfn */
 341
 342         INIT_MOD_DEP_VAR(in_gate_area_no_mm, in_gate_area_no_mm);
 343 #else /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 344         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 345 #endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38))  */
 346
 347 #if defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM)
 348         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 349 #endif /* defined(ARCH_HAS_FLUSH_ANON_PAGE) && defined(CONFIG_ARM) */
 350
 351         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 352
 353 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 354 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 355         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 356 # else
 357         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 358 # endif
 359 #else /*2.6.16 */
 360         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 361 #endif
 362 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
 363         INIT_MOD_DEP_VAR(do_mmap_pgoff, do_mmap_pgoff);
 364 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) */
 365
 366         return 0;
 367 }
 368
 369 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) /* FIXME: must be < 32 */
 370 #define GUP_FLAGS_WRITE                  0x1
 371 #define GUP_FLAGS_FORCE                  0x2
 372 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 373 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 374 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 38) */
 375
 376 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 377 static inline int use_zero_page(struct vm_area_struct *vma)
 378 {
 379         /*
 380          * We don't want to optimize FOLL_ANON for make_pages_present()
 381          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 382          * we want to get the page from the page tables to make sure
 383          * that we serialize and update with any other user of that
 384          * mapping.
 385          */
 386         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 387                 return 0;
 388         /*
 389          * And if we have a fault routine, it's not an anonymous region.
 390          */
 391         return !vma->vm_ops || !vma->vm_ops->fault;
 392 }
 393
 394 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
 395
 396 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 397
 398 #ifdef __HAVE_COLOR_ZERO_PAGE
 399
 400 static inline int swap_is_zero_pfn(unsigned long pfn)
 401 {
 402         unsigned long offset_from_zero_pfn = pfn - swap_zero_pfn;
 403         return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 404 }
 405
 406 #else /* __HAVE_COLOR_ZERO_PAGE */
 407
 408 static inline int swap_is_zero_pfn(unsigned long pfn)
 409 {
 410         return pfn == swap_zero_pfn;
 411 }
 412 #endif /* __HAVE_COLOR_ZERO_PAGE */
 413
 414 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 415
 416 static inline int swap_is_zero_pfn(unsigned long pfn)
 417 {
 418 #ifndef is_zero_pfn
 419         return pfn == swap_zero_pfn;
 420 #else /* is_zero_pfn */
 421         return is_zero_pfn(pfn);
 422 #endif /* is_zero_pfn */
 423 }
 424
 425 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
 426
 427 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 428 {
 429         return stack_guard_page_start(vma, addr) ||
 430                         stack_guard_page_end(vma, addr+PAGE_SIZE);
 431 }
 432
 433 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 434
 435 long __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 436                 unsigned long start, unsigned long nr_pages,
 437                 unsigned int gup_flags, struct page **pages,
 438                 struct vm_area_struct **vmas, int *nonblocking)
 439 {
 440         long i;
 441         unsigned long vm_flags;
 442         unsigned int page_mask;
 443
 444         if (!nr_pages)
 445                 return 0;
 446
 447         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 448
 449         /*
 450          * Require read or write permissions.
 451          * If FOLL_FORCE is set, we only require the "MAY" flags.
 452          */
 453         vm_flags  = (gup_flags & FOLL_WRITE) ?
 454                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 455         vm_flags &= (gup_flags & FOLL_FORCE) ?
 456                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 457
 458         /*
 459          * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
 460          * would be called on PROT_NONE ranges. We must never invoke
 461          * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
 462          * page faults would unprotect the PROT_NONE ranges if
 463          * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
 464          * bitflag. So to avoid that, don't set FOLL_NUMA if
 465          * FOLL_FORCE is set.
 466          */
 467         if (!(gup_flags & FOLL_FORCE))
 468                 gup_flags |= FOLL_NUMA;
 469
 470         i = 0;
 471
 472         do {
 473                 struct vm_area_struct *vma;
 474
 475                 vma = swap_find_extend_vma(mm, start);
 476                 if (!vma && swap_in_gate_area(tsk, start)) {
 477                         unsigned long pg = start & PAGE_MASK;
 478                         pgd_t *pgd;
 479                         pud_t *pud;
 480                         pmd_t *pmd;
 481                         pte_t *pte;
 482
 483                         /* user gate pages are read-only */
 484                         if (gup_flags & FOLL_WRITE)
 485                                 return i ? : -EFAULT;
 486                         if (pg > TASK_SIZE)
 487                                 pgd = pgd_offset_k(pg);
 488                         else
 489                                 pgd = pgd_offset_gate(mm, pg);
 490                         BUG_ON(pgd_none(*pgd));
 491                         pud = pud_offset(pgd, pg);
 492                         BUG_ON(pud_none(*pud));
 493                         pmd = pmd_offset(pud, pg);
 494                         if (pmd_none(*pmd))
 495                                 return i ? : -EFAULT;
 496                         VM_BUG_ON(pmd_trans_huge(*pmd));
 497                         pte = pte_offset_map(pmd, pg);
 498                         if (pte_none(*pte)) {
 499                                 pte_unmap(pte);
 500                                 return i ? : -EFAULT;
 501                         }
 502                         vma = swap_get_gate_vma(mm);
 503                         if (pages) {
 504                                 struct page *page;
 505
 506                                 page = swap_vm_normal_page(vma, start, *pte);
 507                                 if (!page) {
 508                                         if (!(gup_flags & FOLL_DUMP) &&
 509                                              swap_is_zero_pfn(pte_pfn(*pte)))
 510                                                 page = pte_page(*pte);
 511                                         else {
 512                                                 pte_unmap(pte);
 513                                                 return i ? : -EFAULT;
 514                                         }
 515                                 }
 516                                 pages[i] = page;
 517                                 get_page(page);
 518                         }
 519                         pte_unmap(pte);
 520                         page_mask = 0;
 521                         goto next_page;
 522                 }
 523
 524                 if (!vma ||
 525                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 526                     !(vm_flags & vma->vm_flags))
 527                         return i ? : -EFAULT;
 528
 529                 if (is_vm_hugetlb_page(vma)) {
 530                         i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
 531                                         &start, &nr_pages, i, gup_flags);
 532                         continue;
 533                 }
 534
 535                 do {
 536                         struct page *page;
 537                         unsigned int foll_flags = gup_flags;
 538                         unsigned int page_increm;
 539
 540                         /*
 541                          * If we have a pending SIGKILL, don't keep faulting
 542                          * pages and potentially allocating memory.
 543                          */
 544                         if (unlikely(fatal_signal_pending(current)))
 545                                 return i ? i : -ERESTARTSYS;
 546
 547                         /* cond_resched(); */
 548                         while (!(page = swap_follow_page_mask(vma, start,
 549                                                 foll_flags, &page_mask))) {
 550                                 int ret;
 551                                 unsigned int fault_flags = 0;
 552
 553                                 /* For mlock, just skip the stack guard page. */
 554                                 if (foll_flags & FOLL_MLOCK) {
 555                                         if (stack_guard_page(vma, start))
 556                                                 goto next_page;
 557                                 }
 558                                 if (foll_flags & FOLL_WRITE)
 559                                         fault_flags |= FAULT_FLAG_WRITE;
 560                                 if (nonblocking)
 561                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 562                                 if (foll_flags & FOLL_NOWAIT)
 563                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 564
 565                                 ret = swap_handle_mm_fault(mm, vma, start,
 566                                                         fault_flags);
 567
 568                                 if (ret & VM_FAULT_ERROR) {
 569                                         if (ret & VM_FAULT_OOM)
 570                                                 return i ? i : -ENOMEM;
 571                                         if (ret & (VM_FAULT_HWPOISON |
 572                                                    VM_FAULT_HWPOISON_LARGE)) {
 573                                                 if (i)
 574                                                         return i;
 575                                                 else if (gup_flags & FOLL_HWPOISON)
 576                                                         return -EHWPOISON;
 577                                                 else
 578                                                         return -EFAULT;
 579                                         }
 580                                         if (ret & VM_FAULT_SIGBUS)
 581                                                 return i ? i : -EFAULT;
 582                                         BUG();
 583                                 }
 584
 585                                 if (tsk) {
 586                                         if (ret & VM_FAULT_MAJOR)
 587                                                 tsk->maj_flt++;
 588                                         else
 589                                                 tsk->min_flt++;
 590                                 }
 591
 592                                 if (ret & VM_FAULT_RETRY) {
 593                                         if (nonblocking)
 594                                                 *nonblocking = 0;
 595                                         return i;
 596                                 }
 597
 598                                 /*
 599                                  * The VM_FAULT_WRITE bit tells us that
 600                                  * do_wp_page has broken COW when necessary,
 601                                  * even if maybe_mkwrite decided not to set
 602                                  * pte_write. We can thus safely do subsequent
 603                                  * page lookups as if they were reads. But only
 604                                  * do so when looping for pte_write is futile:
 605                                  * in some cases userspace may also be wanting
 606                                  * to write to the gotten user page, which a
 607                                  * read fault here might prevent (a readonly
 608                                  * page might get reCOWed by userspace write).
 609                                  */
 610                                 if ((ret & VM_FAULT_WRITE) &&
 611                                     !(vma->vm_flags & VM_WRITE))
 612                                         foll_flags &= ~FOLL_WRITE;
 613
 614                                 /* cond_resched(); */
 615                         }
 616                         if (IS_ERR(page))
 617                                 return i ? i : PTR_ERR(page);
 618                         if (pages) {
 619                                 pages[i] = page;
 620
 621                                 swap_flush_anon_page(vma, page, start);
 622                                 flush_dcache_page(page);
 623                                 page_mask = 0;
 624                         }
 625 next_page:
 626                         if (vmas) {
 627                                 vmas[i] = vma;
 628                                 page_mask = 0;
 629                         }
 630                         page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
 631                         if (page_increm > nr_pages)
 632                                 page_increm = nr_pages;
 633                         i += page_increm;
 634                         start += page_increm * PAGE_SIZE;
 635                         nr_pages -= page_increm;
 636                 } while (nr_pages && start < vma->vm_end);
 637         } while (nr_pages);
 638         return i;
 639 }
 640
 641 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 642
 643 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 644                         unsigned long start, int nr_pages, unsigned int gup_flags,
 645                         struct page **pages, struct vm_area_struct **vmas,
 646                         int *nonblocking)
 647 {
 648         int i;
 649         unsigned long vm_flags;
 650
 651         if (nr_pages <= 0) {
 652                 return 0;
 653         }
 654
 655         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 656
 657         /*
 658          * Require read or write permissions.
 659          * If FOLL_FORCE is set, we only require the "MAY" flags.
 660          */
 661         vm_flags  = (gup_flags & FOLL_WRITE) ?
 662                         (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 663         vm_flags &= (gup_flags & FOLL_FORCE) ?
 664                         (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 665         i = 0;
 666
 667         do {
 668                 struct vm_area_struct *vma;
 669
 670                 vma = swap_find_extend_vma(mm, start);
 671                 if (!vma && swap_in_gate_area_no_xxx(start)) {
 672                         unsigned long pg = start & PAGE_MASK;
 673                         pgd_t *pgd;
 674                         pud_t *pud;
 675                         pmd_t *pmd;
 676                         pte_t *pte;
 677
 678                         /* user gate pages are read-only */
 679                         if (gup_flags & FOLL_WRITE) {
 680                                 return i ? : -EFAULT;
 681                         }
 682                         if (pg > TASK_SIZE)
 683                                 pgd = pgd_offset_k(pg);
 684                         else
 685                                 pgd = pgd_offset_gate(mm, pg);
 686                         BUG_ON(pgd_none(*pgd));
 687                         pud = pud_offset(pgd, pg);
 688                         BUG_ON(pud_none(*pud));
 689                         pmd = pmd_offset(pud, pg);
 690                         if (pmd_none(*pmd)) {
 691                                 return i ? : -EFAULT;
 692                         }
 693                         VM_BUG_ON(pmd_trans_huge(*pmd));
 694                         pte = pte_offset_map(pmd, pg);
 695                         if (pte_none(*pte)) {
 696                                 pte_unmap(pte);
 697                                 return i ? : -EFAULT;
 698                         }
 699                         vma = swap_get_gate_vma(mm);
 700                         if (pages) {
 701                                 struct page *page;
 702
 703                                 page = swap_vm_normal_page(vma, start, *pte);
 704                                 if (!page) {
 705                                         if (!(gup_flags & FOLL_DUMP) &&
 706                                                 swap_is_zero_pfn(pte_pfn(*pte)))
 707                                                 page = pte_page(*pte);
 708                                         else {
 709                                                 pte_unmap(pte);
 710                                                 return i ? : -EFAULT;
 711                                         }
 712                                 }
 713                                 pages[i] = page;
 714                                 get_page(page);
 715                         }
 716                         pte_unmap(pte);
 717                         goto next_page;
 718                 }
 719
 720                 if (!vma ||
 721                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 722                         !(vm_flags & vma->vm_flags)) {
 723                         return i ? : -EFAULT;
 724                 }
 725
 726                 if (is_vm_hugetlb_page(vma)) {
 727                         i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
 728                                         &start, &nr_pages, i, gup_flags);
 729                         continue;
 730                 }
 731
 732                 do {
 733                         struct page *page;
 734                         unsigned int foll_flags = gup_flags;
 735
 736                         /*
 737                          * If we have a pending SIGKILL, don't keep faulting
 738                          * pages and potentially allocating memory.
 739                          */
 740                         if (unlikely(fatal_signal_pending(current))) {
 741                                 return i ? i : -ERESTARTSYS;
 742                         }
 743
 744                         /* cond_resched(); */
 745                         while (!(page = swap_follow_page(vma, start, foll_flags))) {
 746                                 int ret;
 747                                 unsigned int fault_flags = 0;
 748
 749                                 /* For mlock, just skip the stack guard page. */
 750                                 if (foll_flags & FOLL_MLOCK) {
 751                                         if (stack_guard_page(vma, start))
 752                                                 goto next_page;
 753                                 }
 754                                 if (foll_flags & FOLL_WRITE)
 755                                         fault_flags |= FAULT_FLAG_WRITE;
 756                                 if (nonblocking)
 757                                         fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 758                                 if (foll_flags & FOLL_NOWAIT)
 759                                         fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 760
 761                                 ret = swap_handle_mm_fault(mm, vma, start,
 762                                                         fault_flags);
 763
 764                                 if (ret & VM_FAULT_ERROR) {
 765                                         if (ret & VM_FAULT_OOM) {
 766                                                 return i ? i : -ENOMEM;
 767                                         }
 768                                         if (ret & (VM_FAULT_HWPOISON |
 769                                                                 VM_FAULT_HWPOISON_LARGE)) {
 770                                                 if (i) {
 771                                                         return i;
 772                                                 }
 773                                                 else if (gup_flags & FOLL_HWPOISON) {
 774                                                         return -EHWPOISON;
 775                                                 }
 776                                                 else {
 777                                                         return -EFAULT;
 778                                                 }
 779                                         }
 780                                         if (ret & VM_FAULT_SIGBUS) {
 781                                                 return i ? i : -EFAULT;
 782                                         }
 783                                         BUG();
 784                                 }
 785
 786                                 if (tsk) {
 787                                         if (ret & VM_FAULT_MAJOR)
 788                                                 tsk->maj_flt++;
 789                                         else
 790                                                 tsk->min_flt++;
 791                                 }
 792
 793                                 if (ret & VM_FAULT_RETRY) {
 794                                         if (nonblocking)
 795                                                 *nonblocking = 0;
 796                                         return i;
 797                                 }
 798
 799                                 /*
 800                                  * The VM_FAULT_WRITE bit tells us that
 801                                  * do_wp_page has broken COW when necessary,
 802                                  * even if maybe_mkwrite decided not to set
 803                                  * pte_write. We can thus safely do subsequent
 804                                  * page lookups as if they were reads. But only
 805                                  * do so when looping for pte_write is futile:
 806                                  * in some cases userspace may also be wanting
 807                                  * to write to the gotten user page, which a
 808                                  * read fault here might prevent (a readonly
 809                                  * page might get reCOWed by userspace write).
 810                                  */
 811                                 if ((ret & VM_FAULT_WRITE) &&
 812                                         !(vma->vm_flags & VM_WRITE))
 813                                         foll_flags &= ~FOLL_WRITE;
 814
 815                                 /* cond_resched(); */
 816                         }
 817                         if (IS_ERR(page)) {
 818                                 return i ? i : PTR_ERR(page);
 819                         }
 820                         if (pages) {
 821                                 pages[i] = page;
 822
 823                                 swap_flush_anon_page(vma, page, start);
 824                                 flush_dcache_page(page);
 825                         }
 826 next_page:
 827                         if (vmas)
 828                                 vmas[i] = vma;
 829                         i++;
 830                         start += PAGE_SIZE;
 831                         nr_pages--;
 832                 } while (nr_pages && start < vma->vm_end);
 833         } while (nr_pages);
 834
 835         return i;
 836 }
 837
 838 #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) */
 839
 840 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
 841
 842 static int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 843                 unsigned long start, int len, int flags,
 844                 struct page **pages, struct vm_area_struct **vmas)
 845 {
 846         int i;
 847         unsigned int vm_flags = 0;
 848         int write = !!(flags & GUP_FLAGS_WRITE);
 849         int force = !!(flags & GUP_FLAGS_FORCE);
 850         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 851
 852         if (len <= 0)
 853                 return 0;
 854         /*
 855          * Require read or write permissions.
 856          * If 'force' is set, we only require the "MAY" flags.
 857          */
 858         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 859         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 860         i = 0;
 861
 862         do {
 863                 struct vm_area_struct *vma;
 864                 unsigned int foll_flags;
 865
 866                 vma = find_vma(mm, start);
 867                 if (!vma && swap_in_gate_area(tsk, start)) {
 868                         unsigned long pg = start & PAGE_MASK;
 869                         struct vm_area_struct *gate_vma = swap_get_gate_vma(tsk);
 870                         pgd_t *pgd;
 871                         pud_t *pud;
 872                         pmd_t *pmd;
 873                         pte_t *pte;
 874
 875                         /* user gate pages are read-only */
 876                         if (!ignore && write)
 877                                 return i ? : -EFAULT;
 878                         if (pg > TASK_SIZE)
 879                                 pgd = pgd_offset_k(pg);
 880                         else
 881                                 pgd = pgd_offset_gate(mm, pg);
 882                         BUG_ON(pgd_none(*pgd));
 883                         pud = pud_offset(pgd, pg);
 884                         BUG_ON(pud_none(*pud));
 885                         pmd = pmd_offset(pud, pg);
 886                         if (pmd_none(*pmd))
 887                                 return i ? : -EFAULT;
 888                         pte = pte_offset_map(pmd, pg);
 889                         if (pte_none(*pte)) {
 890                                 pte_unmap(pte);
 891                                 return i ? : -EFAULT;
 892                         }
 893                         if (pages) {
 894                                 struct page *page = swap_vm_normal_page(gate_vma, start, *pte);
 895                                 pages[i] = page;
 896                                 if (page)
 897                                         get_page(page);
 898                         }
 899                         pte_unmap(pte);
 900                         if (vmas)
 901                                 vmas[i] = gate_vma;
 902                         i++;
 903                         start += PAGE_SIZE;
 904                         len--;
 905                         continue;
 906                 }
 907
 908                 if (!vma ||
 909                         (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 910                         (!ignore && !(vm_flags & vma->vm_flags)))
 911                         return i ? : -EFAULT;
 912
 913                 if (is_vm_hugetlb_page(vma)) {
 914                         i = swap_follow_hugetlb_page(mm, vma, pages, vmas,
 915                                                 &start, &len, i, write);
 916                         continue;
 917                 }
 918
 919                 foll_flags = FOLL_TOUCH;
 920                 if (pages)
 921                         foll_flags |= FOLL_GET;
 922
 923 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 924 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 925                 if (!write && use_zero_page(vma))
 926                         foll_flags |= FOLL_ANON;
 927 #endif
 928 #endif
 929
 930                 do {
 931                         struct page *page;
 932
 933                         if (write)
 934                                 foll_flags |= FOLL_WRITE;
 935
 936
 937                         //cond_resched();
 938
 939                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 940                         while (!(page = swap_follow_page(vma, start, foll_flags))) {
 941                                 int ret;
 942                                 ret = swap_handle_mm_fault(mm, vma, start,
 943                                                 foll_flags & FOLL_WRITE);
 944
 945 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 946                                 if (ret & VM_FAULT_WRITE)
 947                                         foll_flags &= ~FOLL_WRITE;
 948
 949                                 switch (ret & ~VM_FAULT_WRITE) {
 950                                 case VM_FAULT_MINOR:
 951                                         tsk->min_flt++;
 952                                         break;
 953                                 case VM_FAULT_MAJOR:
 954                                         tsk->maj_flt++;
 955                                         break;
 956                                 case VM_FAULT_SIGBUS:
 957                                         return i ? i : -EFAULT;
 958                                 case VM_FAULT_OOM:
 959                                         return i ? i : -ENOMEM;
 960                                 default:
 961                                         BUG();
 962                                 }
 963
 964 #else
 965                                 if (ret & VM_FAULT_ERROR) {
 966                                         if (ret & VM_FAULT_OOM)
 967                                                 return i ? i : -ENOMEM;
 968                                         else if (ret & VM_FAULT_SIGBUS)
 969                                                 return i ? i : -EFAULT;
 970                                         BUG();
 971                                 }
 972                                 if (ret & VM_FAULT_MAJOR)
 973                                         tsk->maj_flt++;
 974                                 else
 975                                         tsk->min_flt++;
 976
 977                                 /*
 978                                  * The VM_FAULT_WRITE bit tells us that
 979                                  * do_wp_page has broken COW when necessary,
 980                                  * even if maybe_mkwrite decided not to set
 981                                  * pte_write. We can thus safely do subsequent
 982                                  * page lookups as if they were reads. But only
 983                                  * do so when looping for pte_write is futile:
 984                                  * in some cases userspace may also be wanting
 985                                  * to write to the gotten user page, which a
 986                                  * read fault here might prevent (a readonly
 987                                  * page might get reCOWed by userspace write).
 988                                  */
 989                                 if ((ret & VM_FAULT_WRITE) &&
 990                                                 !(vma->vm_flags & VM_WRITE))
 991                                         foll_flags &= ~FOLL_WRITE;
 992
 993                                 //cond_resched();
 994 #endif
 995
 996                         }
 997
 998                         if (IS_ERR(page))
 999                                 return i ? i : PTR_ERR(page);
1000                         if (pages) {
1001                                 pages[i] = page;
1002
1003                                 swap_flush_anon_page(vma, page, start);
1004                                 flush_dcache_page(page);
1005                         }
1006                         if (vmas)
1007                                 vmas[i] = vma;
1008                         i++;
1009                         start += PAGE_SIZE;
1010                         len--;
1011                 } while (len && start < vma->vm_end);
1012         } while (len);
1013         return i;
1014 }
1015 #endif
1016 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1017
1018 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
1019                 unsigned long start, int len, int write, int force,
1020                 struct page **pages, struct vm_area_struct **vmas)
1021 {
1022 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
1023 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) /* FIXME: must be >= 32! */
1024         int flags = FOLL_TOUCH;
1025
1026         if (pages)
1027                 flags |= FOLL_GET;
1028         if (write)
1029                 flags |= FOLL_WRITE;
1030         if (force)
1031                 flags |= FOLL_FORCE;
1032 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1033         int flags = 0;
1034
1035         if (write)
1036                 flags |= GUP_FLAGS_WRITE;
1037         if (force)
1038                 flags |= GUP_FLAGS_FORCE;
1039 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1040
1041         return __get_user_pages_uprobe(tsk, mm,
1042                                 start, len, flags,
1043 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38)
1044                                                 pages, vmas, NULL);
1045 #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1046                                                 pages, vmas);
1047 #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 38) */
1048 #else
1049         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
1050 #endif
1051 }
1052
1053 #define ACCESS_PROCESS_OPTIMIZATION 0
1054
1055 #if ACCESS_PROCESS_OPTIMIZATION
1056
1057 #define GET_STEP_X(LEN, STEP) (((LEN) >= (STEP)) ? (STEP) : (LEN) % (STEP))
1058 #define GET_STEP_4(LEN) GET_STEP_X((LEN), 4)
1059
1060 static void read_data_current(unsigned long addr, void *buf, int len)
1061 {
1062         int step;
1063         int pos = 0;
1064
1065         for (step = GET_STEP_4(len); len; len -= step) {
1066                 switch (GET_STEP_4(len)) {
1067                 case 1:
1068                         get_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1069                         step = 1;
1070                         break;
1071
1072                 case 2:
1073                 case 3:
1074                         get_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1075                         step = 2;
1076                         break;
1077
1078                 case 4:
1079                         get_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1080                         step = 4;
1081                         break;
1082                 }
1083
1084                 pos += step;
1085         }
1086 }
1087
1088 // not working
1089 static void write_data_current(unsigned long addr, void *buf, int len)
1090 {
1091         int step;
1092         int pos = 0;
1093
1094         for (step = GET_STEP_4(len); len; len -= step) {
1095                 switch (GET_STEP_4(len)) {
1096                 case 1:
1097                         put_user(*(u8 *)(buf + pos), (unsigned long *)(addr + pos));
1098                         step = 1;
1099                         break;
1100
1101                 case 2:
1102                 case 3:
1103                         put_user(*(u16 *)(buf + pos), (unsigned long *)(addr + pos));
1104                         step = 2;
1105                         break;
1106
1107                 case 4:
1108                         put_user(*(u32 *)(buf + pos), (unsigned long *)(addr + pos));
1109                         step = 4;
1110                         break;
1111                 }
1112
1113                 pos += step;
1114         }
1115 }
1116 #endif
1117
1118 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1119 {
1120         struct mm_struct *mm;
1121         struct vm_area_struct *vma;
1122         void *old_buf = buf;
1123         int atomic;
1124
1125         if (len <= 0) {
1126                 return -1;
1127         }
1128
1129 #if ACCESS_PROCESS_OPTIMIZATION
1130         if (write == 0 && tsk == current) {
1131                 read_data_current(addr, buf, len);
1132                 return len;
1133         }
1134 #endif
1135
1136         mm = tsk->mm; /* function 'get_task_mm' is to be called */
1137         if (!mm)
1138                 return 0;
1139
1140         /* FIXME: danger: write memory in atomic context */
1141         atomic = in_atomic();
1142
1143         /* ignore errors, just check how much was successfully transferred */
1144         while (len) {
1145                 int bytes, ret, offset;
1146                 void *maddr;
1147                 struct page *page = NULL;
1148
1149                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
1150                                                 write, 1, &page, &vma);
1151
1152                 if (ret <= 0) {
1153                         /*
1154                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
1155                          * we can access using slightly different code.
1156                          */
1157 #ifdef CONFIG_HAVE_IOREMAP_PROT
1158                         vma = find_vma(mm, addr);
1159                         if (!vma)
1160                                 break;
1161                         if (vma->vm_ops && vma->vm_ops->access)
1162                                 ret = vma->vm_ops->access(vma, addr, buf,
1163                                                         len, write);
1164                         if (ret <= 0)
1165 #endif
1166                                 break;
1167                         bytes = ret;
1168                 } else {
1169                         bytes = len;
1170                         offset = addr & (PAGE_SIZE-1);
1171                         if (bytes > PAGE_SIZE-offset)
1172                                 bytes = PAGE_SIZE-offset;
1173
1174                         maddr = atomic ? swap_kmap_atomic(page) : kmap(page);
1175
1176                         if (write) {
1177                                 swap_copy_to_user_page(vma, page, addr,
1178                                                         maddr + offset, buf, bytes);
1179                                 set_page_dirty_lock(page);
1180                         } else {
1181                                 copy_from_user_page(vma, page, addr,
1182                                                         buf, maddr + offset, bytes);
1183                         }
1184
1185                         atomic ? swap_kunmap_atomic(maddr) : kunmap(page);
1186                         page_cache_release(page);
1187                 }
1188                 len -= bytes;
1189                 buf += bytes;
1190                 addr += bytes;
1191         }
1192
1193         return buf - old_buf;
1194 }
1195
1196 int page_present (struct mm_struct *mm, unsigned long address)
1197 {
1198         pgd_t *pgd;
1199         pud_t *pud;
1200         pmd_t *pmd;
1201         pte_t *ptep, pte;
1202         unsigned long pfn;
1203
1204         pgd = pgd_offset(mm, address);
1205         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
1206                 goto out;
1207
1208         pud = pud_offset(pgd, address);
1209         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
1210                 goto out;
1211
1212         pmd = pmd_offset(pud, address);
1213         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
1214                 goto out;
1215
1216         ptep = pte_offset_map(pmd, address);
1217         if (!ptep)
1218                 goto out;
1219
1220         pte = *ptep;
1221         pte_unmap(ptep);
1222         if (pte_present(pte)) {
1223                 pfn = pte_pfn(pte);
1224                 if (pfn_valid(pfn)) {
1225                         return 1;
1226                 }
1227         }
1228
1229 out:
1230         return 0;
1231 }
1232
1233
1234 EXPORT_SYMBOL_GPL (page_present);
1235 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
1236