kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25
  26  */
  27
  28 #include <linux/module.h>
  29
  30 #include "dbi_kprobes_deps.h"
  31 #include "dbi_kdebug.h"
  32
  33
  34 unsigned int *sched_addr;
  35 unsigned int *fork_addr;
  36
  37
  38 #define GUP_FLAGS_WRITE                  0x1
  39 #define GUP_FLAGS_WRITE                  0x1
  40 #define GUP_FLAGS_FORCE                  0x2
  41 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
  42 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
  43
  44 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
  45
  46 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
  47
  48 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
  49
  50 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  51 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
  52 #else
  53 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
  54 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
  55
  56 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
  57 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
  58 DECLARE_MOD_FUNC_DEP(follow_page, \
  59                 struct page *, struct vm_area_struct * vma, \
  60                 unsigned long address, unsigned int foll_flags);
  61 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
  62                 void, struct vm_area_struct *vma, struct page *page, \
  63                 unsigned long vmaddr);
  64 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
  65                 struct page *, struct vm_area_struct *vma, \
  66                 unsigned long addr, pte_t pte);
  67
  68 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
  69                 void, struct vm_area_struct *vma, struct page *page, \
  70                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
  71
  72
  73 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
  74 DECLARE_MOD_FUNC_DEP(put_task_struct, \
  75                 void, struct task_struct *tsk);
  76 #else
  77 DECLARE_MOD_FUNC_DEP(put_task_struct, \
  78                 void, struct rcu_head * rhp);
  79 #endif
  80
  81         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
  82 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
  83
  84         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
  85 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
  86
  87 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  88         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
  89                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
  90 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
  91 #else
  92         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
  93                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
  94 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
  95 #endif
  96
  97         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
  98                         struct vm_area_struct *, struct task_struct *tsk)
  99 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 100
 101         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
 102 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
 103
 104         DECLARE_MOD_DEP_WRAPPER (follow_page, \
 105                         struct page *, struct vm_area_struct * vma, \
 106                         unsigned long address, unsigned int foll_flags)
 107 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 108
 109         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 110                         void, struct vm_area_struct *vma, \
 111                         struct page *page, unsigned long vmaddr)
 112 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 113
 114         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 115                         struct page *, struct vm_area_struct *vma, \
 116                         unsigned long addr, pte_t pte)
 117 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 118
 119         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
 120                         void, struct vm_area_struct *vma, struct page *page, \
 121                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 122 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 123
 124 int init_module_dependencies()
 125 {
 126         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 127         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 128         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 129         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 130         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 131         INIT_MOD_DEP_VAR(follow_page, follow_page);
 132         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 133         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 134         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 135
 136 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 137 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 138         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 139 # else
 140         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 141 # endif
 142 #else /*2.6.16 */
 143         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 144 #endif
 145
 146         return 0;
 147 }
 148
 149
 150 static inline
 151 int use_zero_page(struct vm_area_struct *vma)
 152 {
 153         /*
 154          * We don't want to optimize FOLL_ANON for make_pages_present()
 155          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 156          * we want to get the page from the page tables to make sure
 157          * that we serialize and update with any other user of that
 158          * mapping.
 159          */
 160         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 161                 return 0;
 162         /*
 163          * And if we have a fault routine, it's not an anonymous region.
 164          */
 165         return !vma->vm_ops || !vma->vm_ops->fault;
 166 }
 167
 168 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 169                 unsigned long start, int len, int flags,
 170                 struct page **pages, struct vm_area_struct **vmas)
 171 {
 172         int i;
 173         unsigned int vm_flags = 0;
 174         int write = !!(flags & GUP_FLAGS_WRITE);
 175         int force = !!(flags & GUP_FLAGS_FORCE);
 176         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 177         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 178
 179         if (len <= 0)
 180                 return 0;
 181         /*
 182          * Require read or write permissions.
 183          * If 'force' is set, we only require the "MAY" flags.
 184          */
 185         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 186         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 187         i = 0;
 188
 189         do {
 190                 struct vm_area_struct *vma;
 191                 unsigned int foll_flags;
 192
 193                 //vma = find_extend_vma(mm, start);
 194                 vma = find_vma(mm, start);
 195                 if (!vma && in_gate_area(tsk, start)) {
 196                         unsigned long pg = start & PAGE_MASK;
 197                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 198                         pgd_t *pgd;
 199                         pud_t *pud;
 200                         pmd_t *pmd;
 201                         pte_t *pte;
 202
 203                         /* user gate pages are read-only */
 204                         if (!ignore && write)
 205                                 return i ? : -EFAULT;
 206                         if (pg > TASK_SIZE)
 207                                 pgd = pgd_offset_k(pg);
 208                         else
 209                                 pgd = pgd_offset_gate(mm, pg);
 210                         BUG_ON(pgd_none(*pgd));
 211                         pud = pud_offset(pgd, pg);
 212                         BUG_ON(pud_none(*pud));
 213                         pmd = pmd_offset(pud, pg);
 214                         if (pmd_none(*pmd))
 215                                 return i ? : -EFAULT;
 216                         pte = pte_offset_map(pmd, pg);
 217                         if (pte_none(*pte)) {
 218                                 pte_unmap(pte);
 219                                 return i ? : -EFAULT;
 220                         }
 221                         if (pages) {
 222                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 223                                 pages[i] = page;
 224                                 if (page)
 225                                         get_page(page);
 226                         }
 227                         pte_unmap(pte);
 228                         if (vmas)
 229                                 vmas[i] = gate_vma;
 230                         i++;
 231                         start += PAGE_SIZE;
 232                         len--;
 233                         continue;
 234                 }
 235
 236                 if (!vma ||
 237                                 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 238                                 (!ignore && !(vm_flags & vma->vm_flags)))
 239                         return i ? : -EFAULT;
 240
 241                 if (is_vm_hugetlb_page(vma)) {
 242                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 243                                         &start, &len, i, write);
 244                         continue;
 245                 }
 246
 247                 foll_flags = FOLL_TOUCH;
 248                 if (pages)
 249                         foll_flags |= FOLL_GET;
 250
 251 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 252                 if (!write && use_zero_page(vma))
 253                         foll_flags |= FOLL_ANON;
 254 #endif
 255
 256                 do {
 257                         struct page *page;
 258
 259                         /*
 260                          * If we have a pending SIGKILL, don't keep faulting
 261                          * pages and potentially allocating memory, unless
 262                          * current is handling munlock--e.g., on exit. In
 263                          * that case, we are not allocating memory.  Rather,
 264                          * we're only unlocking already resident/mapped pages.
 265                          */
 266                         if (unlikely(!ignore_sigkill &&
 267                                                 fatal_signal_pending(current)))
 268                                 return i ? i : -ERESTARTSYS;
 269
 270                         if (write)
 271                                 foll_flags |= FOLL_WRITE;
 272
 273
 274                         //cond_resched();
 275
 276                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 277                         while (!(page = follow_page(vma, start, foll_flags))) {
 278                                 int ret;
 279                                 ret = handle_mm_fault(mm, vma, start,
 280                                                 foll_flags & FOLL_WRITE);
 281                                 if (ret & VM_FAULT_ERROR) {
 282                                         if (ret & VM_FAULT_OOM)
 283                                                 return i ? i : -ENOMEM;
 284                                         else if (ret & VM_FAULT_SIGBUS)
 285                                                 return i ? i : -EFAULT;
 286                                         BUG();
 287                                 }
 288                                 if (ret & VM_FAULT_MAJOR)
 289                                         tsk->maj_flt++;
 290                                 else
 291                                         tsk->min_flt++;
 292
 293                                 /*
 294                                  * The VM_FAULT_WRITE bit tells us that
 295                                  * do_wp_page has broken COW when necessary,
 296                                  * even if maybe_mkwrite decided not to set
 297                                  * pte_write. We can thus safely do subsequent
 298                                  * page lookups as if they were reads. But only
 299                                  * do so when looping for pte_write is futile:
 300                                  * in some cases userspace may also be wanting
 301                                  * to write to the gotten user page, which a
 302                                  * read fault here might prevent (a readonly
 303                                  * page might get reCOWed by userspace write).
 304                                  */
 305                                 if ((ret & VM_FAULT_WRITE) &&
 306                                                 !(vma->vm_flags & VM_WRITE))
 307                                         foll_flags &= ~FOLL_WRITE;
 308
 309                                 //cond_resched();
 310                         }
 311                         if (IS_ERR(page))
 312                                 return i ? i : PTR_ERR(page);
 313                         if (pages) {
 314                                 pages[i] = page;
 315
 316                                 flush_anon_page(vma, page, start);
 317                                 flush_dcache_page(page);
 318                         }
 319                         if (vmas)
 320                                 vmas[i] = vma;
 321                         i++;
 322                         start += PAGE_SIZE;
 323                         len--;
 324                 } while (len && start < vma->vm_end);
 325         } while (len);
 326         return i;
 327 }
 328
 329 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 330                 unsigned long start, int len, int write, int force,
 331                 struct page **pages, struct vm_area_struct **vmas)
 332 {
 333 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 334         int flags = 0;
 335
 336         if (write)
 337                 flags |= GUP_FLAGS_WRITE;
 338         if (force)
 339                 flags |= GUP_FLAGS_FORCE;
 340
 341         return __get_user_pages_uprobe(tsk, mm,
 342                         start, len, flags,
 343                         pages, vmas);
 344 #else
 345         return get_user_pages(tsk, mm,
 346                               start, len, write, force,
 347                               pages, vmas);
 348 #endif
 349 }
 350
 351 int access_process_vm_atomic (struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 352 {
 353
 354
 355         struct mm_struct *mm;
 356         struct vm_area_struct *vma;
 357         void *old_buf = buf;
 358
 359         mm = get_task_mm(tsk);
 360         if (!mm)
 361                 return 0;
 362
 363         down_read(&mm->mmap_sem);
 364         /* ignore errors, just check how much was successfully transferred */
 365         while (len) {
 366                 int bytes, ret, offset;
 367                 void *maddr;
 368                 struct page *page = NULL;
 369
 370                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
 371                                 write, 1, &page, &vma);
 372                 if (ret <= 0) {
 373                         /*
 374                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
 375                          * we can access using slightly different code.
 376                          */
 377 #ifdef CONFIG_HAVE_IOREMAP_PROT
 378                         vma = find_vma(mm, addr);
 379                         if (!vma)
 380                                 break;
 381                         if (vma->vm_ops && vma->vm_ops->access)
 382                                 ret = vma->vm_ops->access(vma, addr, buf,
 383                                                 len, write);
 384                         if (ret <= 0)
 385 #endif
 386                                 break;
 387                         bytes = ret;
 388                 } else {
 389                         bytes = len;
 390                         offset = addr & (PAGE_SIZE-1);
 391                         if (bytes > PAGE_SIZE-offset)
 392                                 bytes = PAGE_SIZE-offset;
 393
 394                         maddr = kmap(page);
 395                         if (write) {
 396                                 copy_to_user_page(vma, page, addr,
 397                                                 maddr + offset, buf, bytes);
 398                                 set_page_dirty_lock(page);
 399                         } else {
 400                                 copy_from_user_page(vma, page, addr,
 401                                                 buf, maddr + offset, bytes);
 402                         }
 403                         kunmap(page);
 404                         page_cache_release(page);
 405                 }
 406                 len -= bytes;
 407                 buf += bytes;
 408                 addr += bytes;
 409         }
 410         up_read(&mm->mmap_sem);
 411         mmput(mm);
 412
 413         return buf - old_buf;
 414
 415 }
 416
 417 int page_present (struct mm_struct *mm, unsigned long addr)
 418 {
 419         pgd_t *pgd;
 420         pmd_t *pmd;
 421         pte_t *pte;
 422         int ret = 0;
 423 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
 424         pud_t *pud;
 425 #endif
 426
 427         //printk("page_present\n");
 428         //BUG_ON(down_read_trylock(&mm->mmap_sem) == 0);
 429         down_read (&mm->mmap_sem);
 430         spin_lock (&(mm->page_table_lock));
 431         pgd = pgd_offset (mm, addr);
 432         //printk("pgd %p\n", pgd);
 433         if ((pgd != NULL) && pgd_present (*pgd))
 434         {
 435                 //printk("pgd_present\n");
 436 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
 437                 pud = pud_offset (pgd, addr);
 438                 //printk("pud %p\n", pud);
 439                 if ((pud != NULL) && pud_present (*pud))
 440                 {
 441                         pmd = pmd_offset (pud, addr);
 442 #else
 443                         {
 444                                 pmd = pmd_offset (pgd, addr);
 445 #endif
 446                                 //printk("pmd %p\n", pmd);
 447                                 if ((pmd != NULL) && pmd_present (*pmd))
 448                                 {
 449                                         //spinlock_t *ptl;
 450                                         //printk("pmd_present\n");
 451                                         pte = pte_offset_map (pmd, addr);
 452                                         //pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 453                                         //printk("pte %p/%lx\n", pte, addr);
 454                                         if ((pte != NULL) && pte_present (*pte))
 455                                         {
 456                                                 ret = 1;
 457                                                 //printk("pte_present\n");
 458                                         }
 459                                         pte_unmap (pte);
 460                                         //pte_unmap_unlock(pte, ptl);
 461                                 }
 462                         }
 463                 }
 464                 spin_unlock (&(mm->page_table_lock));
 465                 up_read (&mm->mmap_sem);
 466                 //printk("page_present %d\n", ret);
 467                 return ret;
 468         }
 469
 470
 471 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
 472 EXPORT_SYMBOL_GPL (page_present);
 473 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
 474