picofix
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29 #include <linux/sched.h>
30
31 #include <asm/pgtable.h>
32
33 #include "dbi_kprobes_deps.h"
34 #include "dbi_kdebug.h"
35
36
37 #include <linux/slab.h>
38
39 unsigned int *sched_addr;
40 unsigned int *fork_addr;
41
42
43 #define GUP_FLAGS_WRITE                  0x1
44 #define GUP_FLAGS_FORCE                  0x2
45 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
46 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
47
48 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
49 struct mm_struct* init_mm_ptr;
50 struct mm_struct init_mm;
51 #endif
52
53
54 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
55 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
56 #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
57 DECLARE_MOD_FUNC_DEP(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len);
58 #endif
59
60 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
61
62 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
63 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
64 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
65 #endif
66 #else
67 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
68 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
69
70 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
71
72 #ifdef CONFIG_HUGETLB_PAGE
73 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write);
74 #endif
75
76 #ifdef  __HAVE_ARCH_GATE_AREA
77 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *tsk,unsigned long addr);
78 #else
79 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
80 #endif
81 DECLARE_MOD_FUNC_DEP(follow_page, \
82                 struct page *, struct vm_area_struct * vma, \
83                 unsigned long address, unsigned int foll_flags);
84 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
85                 void, struct vm_area_struct *vma, struct page *page, \
86                 unsigned long vmaddr);
87 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
88                 struct page *, struct vm_area_struct *vma, \
89                 unsigned long addr, pte_t pte);
90
91 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
92                 void, struct vm_area_struct *vma, struct page *page, \
93                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
94
95
96 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
97 DECLARE_MOD_FUNC_DEP(put_task_struct, \
98                 void, struct task_struct *tsk);
99 #else 
100 DECLARE_MOD_FUNC_DEP(put_task_struct, \
101                 void, struct rcu_head * rhp);
102 #endif
103
104         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
105 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
106
107         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
108 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
109
110 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
111 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
112         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
113                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
114 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
115 #endif
116 #else
117         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
118                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
119 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
120 #endif
121
122         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
123                         struct vm_area_struct *, struct task_struct *tsk)
124 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
125
126 #ifdef CONFIG_HUGETLB_PAGE
127         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write)
128         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
129 #endif
130
131 #ifdef  __HAVE_ARCH_GATE_AREA
132         DECLARE_MOD_DEP_WRAPPER (in_gate_area, int, struct task_struct *tsk, unsigned long addr)
133         IMP_MOD_DEP_WRAPPER (in_gate_area, tsk, addr)
134 #else
135         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
136 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
137 #endif
138
139 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
140         DECLARE_MOD_DEP_WRAPPER (follow_page, \
141                         struct page *, struct vm_area_struct * vma, \
142                         unsigned long address, unsigned int foll_flags)
143 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
144 #endif
145         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
146                         void, struct vm_area_struct *vma, \
147                         struct page *page, unsigned long vmaddr)
148 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
149
150         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
151                         struct page *, struct vm_area_struct *vma, \
152                         unsigned long addr, pte_t pte)
153 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
154
155         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
156                         void, struct vm_area_struct *vma, struct page *page, \
157                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
158 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
159
160 #if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
161         DECLARE_MOD_DEP_WRAPPER(copy_to_user_page, void, struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len)
162 IMP_MOD_DEP_WRAPPER (copy_to_user_page, vma, page, uaddr, dst, src, len)
163 #endif
164
165
166 int init_module_dependencies()
167 {
168
169 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
170         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
171         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
172 #endif
173
174 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
175         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
176 #endif
177
178         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
179         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
180         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
181
182 #ifdef CONFIG_HUGETLB_PAGE
183         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
184 #endif
185
186 #ifdef  __HAVE_ARCH_GATE_AREA
187         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
188 #else
189         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
190 #endif
191         INIT_MOD_DEP_VAR(follow_page, follow_page);
192
193         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
194         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
195         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
196
197 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
198 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
199         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
200 # else
201         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
202 # endif
203 #else /*2.6.16 */
204         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
205 #endif
206
207
208 #if (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32))
209         INIT_MOD_DEP_VAR(copy_to_user_page, copy_to_user_page);
210 #endif
211
212         return 0;
213 }
214
215 #define GUP_FLAGS_WRITE                  0x1
216 #define GUP_FLAGS_FORCE                  0x2
217 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
218 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
219
220 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
221 static inline int use_zero_page(struct vm_area_struct *vma)
222 {
223         /*
224          * We don't want to optimize FOLL_ANON for make_pages_present()
225          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
226          * we want to get the page from the page tables to make sure
227          * that we serialize and update with any other user of that
228          * mapping.
229          */
230         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
231                 return 0;
232         /*
233          * And if we have a fault routine, it's not an anonymous region.
234          */
235         return !vma->vm_ops || !vma->vm_ops->fault;
236 }
237
238 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
239                      unsigned long start, int len, int flags,
240                 struct page **pages, struct vm_area_struct **vmas)
241 {
242         int i;
243         unsigned int vm_flags = 0;
244         int write = !!(flags & GUP_FLAGS_WRITE);
245         int force = !!(flags & GUP_FLAGS_FORCE);
246         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
247         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
248
249         if (len <= 0)
250                 return 0;
251         /* 
252          * Require read or write permissions.
253          * If 'force' is set, we only require the "MAY" flags.
254          */
255         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
256         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
257         i = 0;
258
259         do {
260                 struct vm_area_struct *vma;
261                 unsigned int foll_flags;
262
263                 //vma = find_extend_vma(mm, start);
264                 vma = find_vma(mm, start);
265                 if (!vma && in_gate_area(tsk, start)) {
266                         unsigned long pg = start & PAGE_MASK;
267                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
268                         pgd_t *pgd;
269                         pud_t *pud;
270                         pmd_t *pmd;
271                         pte_t *pte;
272
273                         /* user gate pages are read-only */
274                         if (!ignore && write)
275                                 return i ? : -EFAULT;
276                         if (pg > TASK_SIZE)
277                                 pgd = pgd_offset_k(pg);
278                         else
279                                 pgd = pgd_offset_gate(mm, pg);
280                         BUG_ON(pgd_none(*pgd));
281                         pud = pud_offset(pgd, pg);
282                         BUG_ON(pud_none(*pud));
283                         pmd = pmd_offset(pud, pg);
284                         if (pmd_none(*pmd))
285                                 return i ? : -EFAULT;
286                         pte = pte_offset_map(pmd, pg);
287                         if (pte_none(*pte)) {
288                                 pte_unmap(pte);
289                                 return i ? : -EFAULT;
290                         }
291                         if (pages) {
292                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
293                                 pages[i] = page;
294                                 if (page)
295                                         get_page(page);
296                         }
297                         pte_unmap(pte);
298                         if (vmas)
299                                 vmas[i] = gate_vma;
300                         i++;
301                         start += PAGE_SIZE;
302                         len--;
303                         continue;
304                 }
305
306                 if (!vma ||
307                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
308                     (!ignore && !(vm_flags & vma->vm_flags)))
309                         return i ? : -EFAULT;
310
311                 if (is_vm_hugetlb_page(vma)) {
312 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
313                         i = follow_hugetlb_page(mm, vma, pages, vmas,
314                                                 &start, &len, i);
315 #else
316                         i = follow_hugetlb_page(mm, vma, pages, vmas,
317                                                 &start, &len, i, write);
318 #endif
319                         continue;
320                 }
321
322                 foll_flags = FOLL_TOUCH;
323                 if (pages)
324                         foll_flags |= FOLL_GET;
325
326 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
327 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
328                 if (!write && use_zero_page(vma))
329                   foll_flags |= FOLL_ANON;
330 #endif
331 #endif
332
333                 do {
334                         struct page *page;
335
336 #if 0
337                         /*
338                          * If we have a pending SIGKILL, don't keep faulting
339                          * pages and potentially allocating memory, unless
340                          * current is handling munlock--e.g., on exit. In
341                          * that case, we are not allocating memory.  Rather,
342                          * we're only unlocking already resident/mapped pages.
343                          */
344                         if (unlikely(!ignore_sigkill &&
345                                         fatal_signal_pending(current)))
346                                 return i ? i : -ERESTARTSYS;
347 #endif
348
349                         if (write)
350                                 foll_flags |= FOLL_WRITE;
351
352                         
353                         //cond_resched();
354
355                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
356                         while (!(page = follow_page(vma, start, foll_flags))) {
357                                 int ret;
358                                 ret = handle_mm_fault(mm, vma, start,
359                                                 foll_flags & FOLL_WRITE);
360
361 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
362                                 if (ret & VM_FAULT_WRITE)
363                                   foll_flags &= ~FOLL_WRITE;
364                                 
365                                 switch (ret & ~VM_FAULT_WRITE) {
366                                 case VM_FAULT_MINOR:
367                                   tsk->min_flt++;
368                                   break;
369                                 case VM_FAULT_MAJOR:
370                                   tsk->maj_flt++;
371                                   break;
372                                 case VM_FAULT_SIGBUS:
373                                   return i ? i : -EFAULT;
374                                 case VM_FAULT_OOM:
375                                   return i ? i : -ENOMEM;
376                                 default:
377                                   BUG();
378                                 }
379                                 
380 #else
381                                 if (ret & VM_FAULT_ERROR) {
382                                   if (ret & VM_FAULT_OOM)
383                                     return i ? i : -ENOMEM;
384                                   else if (ret & VM_FAULT_SIGBUS)
385                                     return i ? i : -EFAULT;
386                                   BUG();
387                                 }
388                                 if (ret & VM_FAULT_MAJOR)
389                                   tsk->maj_flt++;
390                                 else
391                                   tsk->min_flt++;
392                                 
393                                 /*
394                                  * The VM_FAULT_WRITE bit tells us that
395                                  * do_wp_page has broken COW when necessary,
396                                  * even if maybe_mkwrite decided not to set
397                                  * pte_write. We can thus safely do subsequent
398                                  * page lookups as if they were reads. But only
399                                  * do so when looping for pte_write is futile:
400                                  * in some cases userspace may also be wanting
401                                  * to write to the gotten user page, which a
402                                  * read fault here might prevent (a readonly
403                                  * page might get reCOWed by userspace write).
404                                  */
405                                 if ((ret & VM_FAULT_WRITE) &&
406                                     !(vma->vm_flags & VM_WRITE))
407                                   foll_flags &= ~FOLL_WRITE;
408                                 
409                                 //cond_resched();
410 #endif
411                                 
412                         }
413
414                         if (IS_ERR(page))
415                                 return i ? i : PTR_ERR(page);
416                         if (pages) {
417                                 pages[i] = page;
418
419 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
420                                 flush_anon_page(page, start);
421 #else
422                                 flush_anon_page(vma, page, start);
423 #endif
424                                 flush_dcache_page(page);
425                         }
426                         if (vmas)
427                                 vmas[i] = vma;
428                         i++;
429                         start += PAGE_SIZE;
430                         len--;
431                 } while (len && start < vma->vm_end);
432         } while (len);
433         return i;
434 }
435 #endif
436
437
438 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
439                 unsigned long start, int len, int write, int force,
440                 struct page **pages, struct vm_area_struct **vmas)
441 {
442 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
443         int flags = 0;
444
445         if (write)
446                 flags |= GUP_FLAGS_WRITE;
447         if (force)
448                 flags |= GUP_FLAGS_FORCE;
449
450         return __get_user_pages_uprobe(tsk, mm,
451                                 start, len, flags,
452                                 pages, vmas);
453 #else
454         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
455 #endif
456 }
457
458
459 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
460 {
461         struct mm_struct *mm;
462         struct vm_area_struct *vma;
463         void *old_buf = buf;
464         unsigned long addr1 = addr;
465         unsigned int* inst_buf = (unsigned int*)old_buf;
466
467
468         mm = get_task_mm(tsk);
469         if (!mm)
470                 return 0;
471
472         down_read(&mm->mmap_sem);
473         /* ignore errors, just check how much was successfully transferred */
474         while (len) {
475                 int bytes, ret, offset;
476                 void *maddr;
477                 struct page *page = NULL;
478
479                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
480                                 write, 1, &page, &vma);
481
482                 if (ret <= 0) {
483                         /*
484                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
485                          * we can access using slightly different code.
486                          */
487 #ifdef CONFIG_HAVE_IOREMAP_PROT
488                         vma = find_vma(mm, addr);
489                         if (!vma)
490                                 break;
491                         if (vma->vm_ops && vma->vm_ops->access)
492                                 ret = vma->vm_ops->access(vma, addr, buf,
493                                                           len, write);
494                         if (ret <= 0)
495 #endif
496                                 break;
497                         bytes = ret;
498                 } else {
499                         bytes = len;
500                         offset = addr & (PAGE_SIZE-1);
501                         if (bytes > PAGE_SIZE-offset)
502                                 bytes = PAGE_SIZE-offset;
503
504                         maddr = kmap(page);
505                         if (write) {
506                                 copy_to_user_page(vma, page, addr,
507                                                   maddr + offset, buf, bytes);
508                                 set_page_dirty_lock(page);
509                         } else {
510                                 copy_from_user_page(vma, page, addr,
511                                                     buf, maddr + offset, bytes);
512                         }
513                         kunmap(page);
514                         page_cache_release(page);
515                 }
516                 len -= bytes;
517                 buf += bytes;
518                 addr += bytes;
519         }
520         up_read(&mm->mmap_sem);
521         mmput(mm);
522
523         return buf - old_buf;
524 }
525
526 int page_present (struct mm_struct *mm, unsigned long address)
527 {
528         pgd_t *pgd;
529         pud_t *pud;
530         pmd_t *pmd;
531         pte_t *ptep, pte;
532         unsigned long pfn;
533
534         pgd = pgd_offset(mm, address);
535         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
536                 goto out;
537
538         pud = pud_offset(pgd, address);
539         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
540                 goto out;
541
542         pmd = pmd_offset(pud, address);
543         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
544                 goto out;
545
546         ptep = pte_offset_map(pmd, address);
547         if (!ptep)
548                 goto out;
549
550         pte = *ptep;
551         pte_unmap(ptep);
552         if (pte_present(pte)) {
553                 pfn = pte_pfn(pte);
554                 if (pfn_valid(pfn)) {
555                         return 1;
556                 }
557         }
558
559 out:
560         return 0;
561 }
562
563
564 EXPORT_SYMBOL_GPL (page_present);
565 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
566 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
567