Merge branch 'refs/heads/master' of ssh://lastakhov@106.109.8.71/srv/git/dbi_new_build
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29 #include <linux/sched.h>
30
31 #include <asm/pgtable.h>
32
33 #include "dbi_kprobes_deps.h"
34 #include "dbi_kdebug.h"
35
36
37 #include <linux/slab.h>
38
39 unsigned int *sched_addr;
40 unsigned int *fork_addr;
41
42
43 #define GUP_FLAGS_WRITE                  0x1
44 #define GUP_FLAGS_WRITE                  0x1
45 #define GUP_FLAGS_FORCE                  0x2
46 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
47 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
48
49 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
50 struct mm_struct* init_mm_ptr;
51 struct mm_struct init_mm;
52 #endif
53
54
55 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
56
57 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
58
59 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
60
61 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
62 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
63 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
64 #endif
65 #else
66 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
67 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
68
69 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
70
71 #ifdef CONFIG_HUGETLB_PAGE
72 DECLARE_MOD_FUNC_DEP(follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write);
73 #endif
74
75 #ifdef  __HAVE_ARCH_GATE_AREA
76 DECLARE_MOD_FUNC_DEP(in_gate_area, int, struct task_struct *tsk,unsigned long addr);
77 #else
78 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
79 #endif
80 DECLARE_MOD_FUNC_DEP(follow_page, \
81                 struct page *, struct vm_area_struct * vma, \
82                 unsigned long address, unsigned int foll_flags);
83 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
84                 void, struct vm_area_struct *vma, struct page *page, \
85                 unsigned long vmaddr);
86 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
87                 struct page *, struct vm_area_struct *vma, \
88                 unsigned long addr, pte_t pte);
89
90 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
91                 void, struct vm_area_struct *vma, struct page *page, \
92                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
93
94
95 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
96 DECLARE_MOD_FUNC_DEP(put_task_struct, \
97                 void, struct task_struct *tsk);
98 #else 
99 DECLARE_MOD_FUNC_DEP(put_task_struct, \
100                 void, struct rcu_head * rhp);
101 #endif
102
103         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
104 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
105
106         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
107 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
108
109 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
110 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
111         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
112                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
113 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
114 #endif
115 #else
116         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
117                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
118 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
119 #endif
120
121         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
122                         struct vm_area_struct *, struct task_struct *tsk)
123 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
124
125 #ifdef CONFIG_HUGETLB_PAGE
126         DECLARE_MOD_DEP_WRAPPER (follow_hugetlb_page, int, struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, int write)
127         IMP_MOD_DEP_WRAPPER (follow_hugetlb_page, mm, vma, pages, vmas, position, length, i, write)
128 #endif
129
130 #ifdef  __HAVE_ARCH_GATE_AREA
131         DECLARE_MOD_DEP_WRAPPER (in_gate_area, int, struct task_struct *tsk, unsigned long addr)
132         IMP_MOD_DEP_WRAPPER (in_gate_area, tsk, addr)
133 #else
134         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
135 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
136 #endif
137
138 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 11))
139         DECLARE_MOD_DEP_WRAPPER (follow_page, \
140                         struct page *, struct vm_area_struct * vma, \
141                         unsigned long address, unsigned int foll_flags)
142 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
143 #endif
144         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
145                         void, struct vm_area_struct *vma, \
146                         struct page *page, unsigned long vmaddr)
147 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
148
149         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
150                         struct page *, struct vm_area_struct *vma, \
151                         unsigned long addr, pte_t pte)
152 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
153
154         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
155                         void, struct vm_area_struct *vma, struct page *page, \
156                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
157 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
158
159 int init_module_dependencies()
160 {
161
162 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
163         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
164         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
165 #endif
166
167 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
168         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
169 #endif
170
171         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
172         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
173         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
174
175 #ifdef CONFIG_HUGETLB_PAGE
176         INIT_MOD_DEP_VAR(follow_hugetlb_page, follow_hugetlb_page);
177 #endif
178
179 #ifdef  __HAVE_ARCH_GATE_AREA
180         INIT_MOD_DEP_VAR(in_gate_area, in_gate_area);
181 #else
182         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
183 #endif
184         INIT_MOD_DEP_VAR(follow_page, follow_page);
185
186         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
187         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
188         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
189
190 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
191 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
192         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
193 # else
194         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
195 # endif
196 #else /*2.6.16 */
197         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
198 #endif
199
200         return 0;
201 }
202
203 #define GUP_FLAGS_WRITE                  0x1
204 #define GUP_FLAGS_FORCE                  0x2
205 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
206 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
207
208 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
209 static inline int use_zero_page(struct vm_area_struct *vma)
210 {
211         /*
212          * We don't want to optimize FOLL_ANON for make_pages_present()
213          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
214          * we want to get the page from the page tables to make sure
215          * that we serialize and update with any other user of that
216          * mapping.
217          */
218         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
219                 return 0;
220         /*
221          * And if we have a fault routine, it's not an anonymous region.
222          */
223         return !vma->vm_ops || !vma->vm_ops->fault;
224 }
225
226 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
227                      unsigned long start, int len, int flags,
228                 struct page **pages, struct vm_area_struct **vmas)
229 {
230         int i;
231         unsigned int vm_flags = 0;
232         int write = !!(flags & GUP_FLAGS_WRITE);
233         int force = !!(flags & GUP_FLAGS_FORCE);
234         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
235         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
236
237         if (len <= 0)
238                 return 0;
239         /* 
240          * Require read or write permissions.
241          * If 'force' is set, we only require the "MAY" flags.
242          */
243         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
244         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
245         i = 0;
246
247         do {
248                 struct vm_area_struct *vma;
249                 unsigned int foll_flags;
250
251                 //vma = find_extend_vma(mm, start);
252                 vma = find_vma(mm, start);
253                 if (!vma && in_gate_area(tsk, start)) {
254                         unsigned long pg = start & PAGE_MASK;
255                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
256                         pgd_t *pgd;
257                         pud_t *pud;
258                         pmd_t *pmd;
259                         pte_t *pte;
260
261                         /* user gate pages are read-only */
262                         if (!ignore && write)
263                                 return i ? : -EFAULT;
264                         if (pg > TASK_SIZE)
265                                 pgd = pgd_offset_k(pg);
266                         else
267                                 pgd = pgd_offset_gate(mm, pg);
268                         BUG_ON(pgd_none(*pgd));
269                         pud = pud_offset(pgd, pg);
270                         BUG_ON(pud_none(*pud));
271                         pmd = pmd_offset(pud, pg);
272                         if (pmd_none(*pmd))
273                                 return i ? : -EFAULT;
274                         pte = pte_offset_map(pmd, pg);
275                         if (pte_none(*pte)) {
276                                 pte_unmap(pte);
277                                 return i ? : -EFAULT;
278                         }
279                         if (pages) {
280                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
281                                 pages[i] = page;
282                                 if (page)
283                                         get_page(page);
284                         }
285                         pte_unmap(pte);
286                         if (vmas)
287                                 vmas[i] = gate_vma;
288                         i++;
289                         start += PAGE_SIZE;
290                         len--;
291                         continue;
292                 }
293
294                 if (!vma ||
295                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
296                     (!ignore && !(vm_flags & vma->vm_flags)))
297                         return i ? : -EFAULT;
298
299                 if (is_vm_hugetlb_page(vma)) {
300 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
301                         i = follow_hugetlb_page(mm, vma, pages, vmas,
302                                                 &start, &len, i);
303 #else
304                         i = follow_hugetlb_page(mm, vma, pages, vmas,
305                                                 &start, &len, i, write);
306 #endif
307                         continue;
308                 }
309
310                 foll_flags = FOLL_TOUCH;
311                 if (pages)
312                         foll_flags |= FOLL_GET;
313
314 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
315 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
316                 if (!write && use_zero_page(vma))
317                   foll_flags |= FOLL_ANON;
318 #endif
319 #endif
320
321                 do {
322                         struct page *page;
323
324 #if 0
325                         /*
326                          * If we have a pending SIGKILL, don't keep faulting
327                          * pages and potentially allocating memory, unless
328                          * current is handling munlock--e.g., on exit. In
329                          * that case, we are not allocating memory.  Rather,
330                          * we're only unlocking already resident/mapped pages.
331                          */
332                         if (unlikely(!ignore_sigkill &&
333                                         fatal_signal_pending(current)))
334                                 return i ? i : -ERESTARTSYS;
335 #endif
336
337                         if (write)
338                                 foll_flags |= FOLL_WRITE;
339
340                         
341                         //cond_resched();
342
343                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
344                         while (!(page = follow_page(vma, start, foll_flags))) {
345                                 int ret;
346                                 ret = handle_mm_fault(mm, vma, start,
347                                                 foll_flags & FOLL_WRITE);
348
349 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
350                                 if (ret & VM_FAULT_WRITE)
351                                   foll_flags &= ~FOLL_WRITE;
352                                 
353                                 switch (ret & ~VM_FAULT_WRITE) {
354                                 case VM_FAULT_MINOR:
355                                   tsk->min_flt++;
356                                   break;
357                                 case VM_FAULT_MAJOR:
358                                   tsk->maj_flt++;
359                                   break;
360                                 case VM_FAULT_SIGBUS:
361                                   return i ? i : -EFAULT;
362                                 case VM_FAULT_OOM:
363                                   return i ? i : -ENOMEM;
364                                 default:
365                                   BUG();
366                                 }
367                                 
368 #else
369                                 if (ret & VM_FAULT_ERROR) {
370                                   if (ret & VM_FAULT_OOM)
371                                     return i ? i : -ENOMEM;
372                                   else if (ret & VM_FAULT_SIGBUS)
373                                     return i ? i : -EFAULT;
374                                   BUG();
375                                 }
376                                 if (ret & VM_FAULT_MAJOR)
377                                   tsk->maj_flt++;
378                                 else
379                                   tsk->min_flt++;
380                                 
381                                 /*
382                                  * The VM_FAULT_WRITE bit tells us that
383                                  * do_wp_page has broken COW when necessary,
384                                  * even if maybe_mkwrite decided not to set
385                                  * pte_write. We can thus safely do subsequent
386                                  * page lookups as if they were reads. But only
387                                  * do so when looping for pte_write is futile:
388                                  * in some cases userspace may also be wanting
389                                  * to write to the gotten user page, which a
390                                  * read fault here might prevent (a readonly
391                                  * page might get reCOWed by userspace write).
392                                  */
393                                 if ((ret & VM_FAULT_WRITE) &&
394                                     !(vma->vm_flags & VM_WRITE))
395                                   foll_flags &= ~FOLL_WRITE;
396                                 
397                                 //cond_resched();
398 #endif
399                                 
400                         }
401
402                         if (IS_ERR(page))
403                                 return i ? i : PTR_ERR(page);
404                         if (pages) {
405                                 pages[i] = page;
406
407 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
408                                 flush_anon_page(page, start);
409 #else
410                                 flush_anon_page(vma, page, start);
411 #endif
412                                 flush_dcache_page(page);
413                         }
414                         if (vmas)
415                                 vmas[i] = vma;
416                         i++;
417                         start += PAGE_SIZE;
418                         len--;
419                 } while (len && start < vma->vm_end);
420         } while (len);
421         return i;
422 }
423 #endif
424
425
426 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
427                 unsigned long start, int len, int write, int force,
428                 struct page **pages, struct vm_area_struct **vmas)
429 {
430 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
431         int flags = 0;
432
433         if (write)
434                 flags |= GUP_FLAGS_WRITE;
435         if (force)
436                 flags |= GUP_FLAGS_FORCE;
437
438         return __get_user_pages_uprobe(tsk, mm,
439                                 start, len, flags,
440                                 pages, vmas);
441 #else
442         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
443 #endif
444 }
445
446
447 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
448 {
449         struct mm_struct *mm;
450         struct vm_area_struct *vma;
451         void *old_buf = buf;
452         unsigned long addr1 = addr;
453         unsigned int* inst_buf = (unsigned int*)buf;
454
455
456         mm = get_task_mm(tsk);
457         if (!mm)
458                 return 0;
459
460         down_read(&mm->mmap_sem);
461         /* ignore errors, just check how much was successfully transferred */
462         while (len) {
463                 int bytes, ret, offset;
464                 void *maddr;
465                 struct page *page = NULL;
466
467                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
468                                 write, 1, &page, &vma);
469
470                 if (ret <= 0) {
471                         /*
472                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
473                          * we can access using slightly different code.
474                          */
475 #ifdef CONFIG_HAVE_IOREMAP_PROT
476                         vma = find_vma(mm, addr);
477                         if (!vma)
478                                 break;
479                         if (vma->vm_ops && vma->vm_ops->access)
480                                 ret = vma->vm_ops->access(vma, addr, buf,
481                                                           len, write);
482                         if (ret <= 0)
483 #endif
484                                 break;
485                         bytes = ret;
486                 } else {
487                         bytes = len;
488                         offset = addr & (PAGE_SIZE-1);
489                         if (bytes > PAGE_SIZE-offset)
490                                 bytes = PAGE_SIZE-offset;
491
492                         maddr = kmap(page);
493                         if (write) {
494                                 copy_to_user_page(vma, page, addr,
495                                                   maddr + offset, buf, bytes);
496                                 set_page_dirty_lock(page);
497                         } else {
498                                 copy_from_user_page(vma, page, addr,
499                                                     buf, maddr + offset, bytes);
500                         }
501                         kunmap(page);
502                         page_cache_release(page);
503                 }
504                 len -= bytes;
505                 buf += bytes;
506                 addr += bytes;
507         }
508         up_read(&mm->mmap_sem);
509         mmput(mm);
510
511
512         if(addr1 == 0xad327238)
513         {
514                         printk(">>>>> %s\n", tsk->comm);
515                         printk(">>>>> %x\n", inst_buf[0]);
516         }
517
518
519         return buf - old_buf;
520 }
521
522 int page_present (struct mm_struct *mm, unsigned long address)
523 {
524         pgd_t *pgd;
525         pud_t *pud;
526         pmd_t *pmd;
527         pte_t *ptep, pte;
528         unsigned long pfn;
529
530         pgd = pgd_offset(mm, address);
531         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
532                 goto out;
533
534         pud = pud_offset(pgd, address);
535         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
536                 goto out;
537
538         pmd = pmd_offset(pud, address);
539         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
540                 goto out;
541
542         ptep = pte_offset_map(pmd, address);
543         if (!ptep)
544                 goto out;
545
546         pte = *ptep;
547         pte_unmap(ptep);
548         if (pte_present(pte)) {
549                 pfn = pte_pfn(pte);
550                 if (pfn_valid(pfn)) {
551                         return 1;
552                 }
553         }
554
555 out:
556         return 0;
557 }
558
559
560 EXPORT_SYMBOL_GPL (page_present);
561 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
562 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
563