kprobe/dbi_kprobes_deps.c

   1 /*
   2  *  Dynamic Binary Instrumentation Module based on KProbes
   3  *  modules/kprobe/dbi_kprobes_deps.h
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  *
  19  * Copyright (C) Samsung Electronics, 2006-2010
  20  *
  21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
  22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
  23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
  24  *
  25
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/sched.h>
  30
  31 #include <asm/pgtable.h>
  32
  33 #include "dbi_kprobes_deps.h"
  34 #include "dbi_kdebug.h"
  35
  36
  37 unsigned int *sched_addr;
  38 unsigned int *fork_addr;
  39
  40
  41 #define GUP_FLAGS_WRITE                  0x1
  42 #define GUP_FLAGS_WRITE                  0x1
  43 #define GUP_FLAGS_FORCE                  0x2
  44 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
  45 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
  46
  47 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
  48 struct mm_struct* init_mm_ptr;
  49 struct mm_struct init_mm;
  50 #endif
  51
  52
  53 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
  54
  55 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
  56
  57 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
  58
  59 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  60 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
  61 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
  62 #endif
  63 #else
  64 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
  65 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
  66
  67 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
  68 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
  69 DECLARE_MOD_FUNC_DEP(follow_page, \
  70                 struct page *, struct vm_area_struct * vma, \
  71                 unsigned long address, unsigned int foll_flags);
  72 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
  73                 void, struct vm_area_struct *vma, struct page *page, \
  74                 unsigned long vmaddr);
  75 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
  76                 struct page *, struct vm_area_struct *vma, \
  77                 unsigned long addr, pte_t pte);
  78
  79 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
  80                 void, struct vm_area_struct *vma, struct page *page, \
  81                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
  82
  83
  84 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
  85 DECLARE_MOD_FUNC_DEP(put_task_struct, \
  86                 void, struct task_struct *tsk);
  87 #else
  88 DECLARE_MOD_FUNC_DEP(put_task_struct, \
  89                 void, struct rcu_head * rhp);
  90 #endif
  91
  92         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
  93 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
  94
  95         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
  96 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
  97
  98 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
  99 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 100         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 101                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 102 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
 103 #endif
 104 #else
 105         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
 106                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
 107 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
 108 #endif
 109
 110         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
 111                         struct vm_area_struct *, struct task_struct *tsk)
 112 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
 113
 114         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
 115 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
 116
 117         DECLARE_MOD_DEP_WRAPPER (follow_page, \
 118                         struct page *, struct vm_area_struct * vma, \
 119                         unsigned long address, unsigned int foll_flags)
 120 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
 121
 122         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
 123                         void, struct vm_area_struct *vma, \
 124                         struct page *page, unsigned long vmaddr)
 125 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
 126
 127         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
 128                         struct page *, struct vm_area_struct *vma, \
 129                         unsigned long addr, pte_t pte)
 130 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
 131
 132         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
 133                         void, struct vm_area_struct *vma, struct page *page, \
 134                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
 135 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
 136
 137 int init_module_dependencies()
 138 {
 139
 140 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
 141         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
 142         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
 143 #endif
 144
 145 //#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
 146 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
 147         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
 148 #endif
 149 //#endif
 150
 151         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
 152         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
 153         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
 154         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
 155         INIT_MOD_DEP_VAR(follow_page, follow_page);
 156         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
 157         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
 158         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
 159
 160 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
 161 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
 162         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
 163 # else
 164         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
 165 # endif
 166 #else /*2.6.16 */
 167         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
 168 #endif
 169
 170         return 0;
 171 }
 172
 173 #define GUP_FLAGS_WRITE                  0x1
 174 #define GUP_FLAGS_FORCE                  0x2
 175 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
 176 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
 177
 178 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 179 static inline int use_zero_page(struct vm_area_struct *vma)
 180 {
 181         /*
 182          * We don't want to optimize FOLL_ANON for make_pages_present()
 183          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
 184          * we want to get the page from the page tables to make sure
 185          * that we serialize and update with any other user of that
 186          * mapping.
 187          */
 188         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
 189                 return 0;
 190         /*
 191          * And if we have a fault routine, it's not an anonymous region.
 192          */
 193         return !vma->vm_ops || !vma->vm_ops->fault;
 194 }
 195
 196 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 197                      unsigned long start, int len, int flags,
 198                 struct page **pages, struct vm_area_struct **vmas)
 199 {
 200         int i;
 201         unsigned int vm_flags = 0;
 202         int write = !!(flags & GUP_FLAGS_WRITE);
 203         int force = !!(flags & GUP_FLAGS_FORCE);
 204         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 205         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 206
 207         if (len <= 0)
 208                 return 0;
 209         /*
 210          * Require read or write permissions.
 211          * If 'force' is set, we only require the "MAY" flags.
 212          */
 213         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 214         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 215         i = 0;
 216
 217         do {
 218                 struct vm_area_struct *vma;
 219                 unsigned int foll_flags;
 220
 221                 //vma = find_extend_vma(mm, start);
 222                 vma = find_vma(mm, start);
 223                 if (!vma && in_gate_area(tsk, start)) {
 224                         unsigned long pg = start & PAGE_MASK;
 225                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
 226                         pgd_t *pgd;
 227                         pud_t *pud;
 228                         pmd_t *pmd;
 229                         pte_t *pte;
 230
 231                         /* user gate pages are read-only */
 232                         if (!ignore && write)
 233                                 return i ? : -EFAULT;
 234                         if (pg > TASK_SIZE)
 235                                 pgd = pgd_offset_k(pg);
 236                         else
 237                                 pgd = pgd_offset_gate(mm, pg);
 238                         BUG_ON(pgd_none(*pgd));
 239                         pud = pud_offset(pgd, pg);
 240                         BUG_ON(pud_none(*pud));
 241                         pmd = pmd_offset(pud, pg);
 242                         if (pmd_none(*pmd))
 243                                 return i ? : -EFAULT;
 244                         pte = pte_offset_map(pmd, pg);
 245                         if (pte_none(*pte)) {
 246                                 pte_unmap(pte);
 247                                 return i ? : -EFAULT;
 248                         }
 249                         if (pages) {
 250                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
 251                                 pages[i] = page;
 252                                 if (page)
 253                                         get_page(page);
 254                         }
 255                         pte_unmap(pte);
 256                         if (vmas)
 257                                 vmas[i] = gate_vma;
 258                         i++;
 259                         start += PAGE_SIZE;
 260                         len--;
 261                         continue;
 262                 }
 263
 264                 if (!vma ||
 265                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
 266                     (!ignore && !(vm_flags & vma->vm_flags)))
 267                         return i ? : -EFAULT;
 268
 269                 if (is_vm_hugetlb_page(vma)) {
 270 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 271                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 272                                                 &start, &len, i);
 273 #else
 274                         i = follow_hugetlb_page(mm, vma, pages, vmas,
 275                                                 &start, &len, i, write);
 276 #endif
 277                         continue;
 278                 }
 279
 280                 foll_flags = FOLL_TOUCH;
 281                 if (pages)
 282                         foll_flags |= FOLL_GET;
 283
 284 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
 285 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
 286                 if (!write && use_zero_page(vma))
 287                   foll_flags |= FOLL_ANON;
 288 #endif
 289 #endif
 290
 291                 do {
 292                         struct page *page;
 293
 294 #if 0
 295                         /*
 296                          * If we have a pending SIGKILL, don't keep faulting
 297                          * pages and potentially allocating memory, unless
 298                          * current is handling munlock--e.g., on exit. In
 299                          * that case, we are not allocating memory.  Rather,
 300                          * we're only unlocking already resident/mapped pages.
 301                          */
 302                         if (unlikely(!ignore_sigkill &&
 303                                         fatal_signal_pending(current)))
 304                                 return i ? i : -ERESTARTSYS;
 305 #endif
 306
 307                         if (write)
 308                                 foll_flags |= FOLL_WRITE;
 309
 310
 311                         //cond_resched();
 312
 313                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
 314                         while (!(page = follow_page(vma, start, foll_flags))) {
 315                                 int ret;
 316                                 ret = handle_mm_fault(mm, vma, start,
 317                                                 foll_flags & FOLL_WRITE);
 318
 319 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 320                                 if (ret & VM_FAULT_WRITE)
 321                                   foll_flags &= ~FOLL_WRITE;
 322
 323                                 switch (ret & ~VM_FAULT_WRITE) {
 324                                 case VM_FAULT_MINOR:
 325                                   tsk->min_flt++;
 326                                   break;
 327                                 case VM_FAULT_MAJOR:
 328                                   tsk->maj_flt++;
 329                                   break;
 330                                 case VM_FAULT_SIGBUS:
 331                                   return i ? i : -EFAULT;
 332                                 case VM_FAULT_OOM:
 333                                   return i ? i : -ENOMEM;
 334                                 default:
 335                                   BUG();
 336                                 }
 337
 338 #else
 339                                 if (ret & VM_FAULT_ERROR) {
 340                                   if (ret & VM_FAULT_OOM)
 341                                     return i ? i : -ENOMEM;
 342                                   else if (ret & VM_FAULT_SIGBUS)
 343                                     return i ? i : -EFAULT;
 344                                   BUG();
 345                                 }
 346                                 if (ret & VM_FAULT_MAJOR)
 347                                   tsk->maj_flt++;
 348                                 else
 349                                   tsk->min_flt++;
 350
 351                                 /*
 352                                  * The VM_FAULT_WRITE bit tells us that
 353                                  * do_wp_page has broken COW when necessary,
 354                                  * even if maybe_mkwrite decided not to set
 355                                  * pte_write. We can thus safely do subsequent
 356                                  * page lookups as if they were reads. But only
 357                                  * do so when looping for pte_write is futile:
 358                                  * in some cases userspace may also be wanting
 359                                  * to write to the gotten user page, which a
 360                                  * read fault here might prevent (a readonly
 361                                  * page might get reCOWed by userspace write).
 362                                  */
 363                                 if ((ret & VM_FAULT_WRITE) &&
 364                                     !(vma->vm_flags & VM_WRITE))
 365                                   foll_flags &= ~FOLL_WRITE;
 366
 367                                 //cond_resched();
 368 #endif
 369
 370                         }
 371
 372                         if (IS_ERR(page))
 373                                 return i ? i : PTR_ERR(page);
 374                         if (pages) {
 375                                 pages[i] = page;
 376
 377 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 378                                 flush_anon_page(page, start);
 379 #else
 380                                 flush_anon_page(vma, page, start);
 381 #endif
 382                                 flush_dcache_page(page);
 383                         }
 384                         if (vmas)
 385                                 vmas[i] = vma;
 386                         i++;
 387                         start += PAGE_SIZE;
 388                         len--;
 389                 } while (len && start < vma->vm_end);
 390         } while (len);
 391         return i;
 392 }
 393 #endif
 394
 395
 396 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
 397                 unsigned long start, int len, int write, int force,
 398                 struct page **pages, struct vm_area_struct **vmas)
 399 {
 400 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 401         int flags = 0;
 402
 403         if (write)
 404                 flags |= GUP_FLAGS_WRITE;
 405         if (force)
 406                 flags |= GUP_FLAGS_FORCE;
 407
 408         return __get_user_pages_uprobe(tsk, mm,
 409                                 start, len, flags,
 410                                 pages, vmas);
 411 #else
 412         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
 413 #endif
 414 }
 415
 416 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 417 {
 418         struct mm_struct *mm;
 419         struct vm_area_struct *vma;
 420         void *old_buf = buf;
 421
 422         mm = get_task_mm(tsk);
 423         if (!mm)
 424                 return 0;
 425
 426         down_read(&mm->mmap_sem);
 427         /* ignore errors, just check how much was successfully transferred */
 428         while (len) {
 429                 int bytes, ret, offset;
 430                 void *maddr;
 431                 struct page *page = NULL;
 432
 433                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
 434                                 write, 1, &page, &vma);
 435                 if (ret <= 0) {
 436                         /*
 437                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
 438                          * we can access using slightly different code.
 439                          */
 440 #ifdef CONFIG_HAVE_IOREMAP_PROT
 441                         vma = find_vma(mm, addr);
 442                         if (!vma)
 443                                 break;
 444                         if (vma->vm_ops && vma->vm_ops->access)
 445                                 ret = vma->vm_ops->access(vma, addr, buf,
 446                                                           len, write);
 447                         if (ret <= 0)
 448 #endif
 449                                 break;
 450                         bytes = ret;
 451                 } else {
 452                         bytes = len;
 453                         offset = addr & (PAGE_SIZE-1);
 454                         if (bytes > PAGE_SIZE-offset)
 455                                 bytes = PAGE_SIZE-offset;
 456
 457                         maddr = kmap(page);
 458                         if (write) {
 459                                 copy_to_user_page(vma, page, addr,
 460                                                   maddr + offset, buf, bytes);
 461                                 set_page_dirty_lock(page);
 462                         } else {
 463                                 copy_from_user_page(vma, page, addr,
 464                                                     buf, maddr + offset, bytes);
 465                         }
 466                         kunmap(page);
 467                         page_cache_release(page);
 468                 }
 469                 len -= bytes;
 470                 buf += bytes;
 471                 addr += bytes;
 472         }
 473         up_read(&mm->mmap_sem);
 474         mmput(mm);
 475
 476         return buf - old_buf;
 477 }
 478
 479 int page_present (struct mm_struct *mm, unsigned long address)
 480 {
 481         pgd_t *pgd;
 482         pud_t *pud;
 483         pmd_t *pmd;
 484         pte_t *ptep, pte;
 485         unsigned long pfn;
 486
 487         pgd = pgd_offset(mm, address);
 488         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 489                 goto out;
 490
 491         pud = pud_offset(pgd, address);
 492         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
 493                 goto out;
 494
 495         pmd = pmd_offset(pud, address);
 496         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
 497                 goto out;
 498
 499         ptep = pte_offset_map(pmd, address);
 500         if (!ptep)
 501                 goto out;
 502
 503         pte = *ptep;
 504         pte_unmap(ptep);
 505         if (pte_present(pte)) {
 506                 pfn = pte_pfn(pte);
 507                 if (pfn_valid(pfn)) {
 508                         return 1;
 509                 }
 510         }
 511
 512 out:
 513         return 0;
 514 }
 515
 516
 517 EXPORT_SYMBOL_GPL (page_present);
 518 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
 519 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
 520