Merge branch 'master' of kovalenko@106.109.8.71:/srv/git/dbi_new_build
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29 #include <linux/sched.h>
30
31 #include <asm/pgtable.h>
32
33 #include "dbi_kprobes_deps.h"
34 #include "dbi_kdebug.h"
35
36
37 unsigned int *sched_addr;
38 unsigned int *fork_addr;
39
40
41 #define GUP_FLAGS_WRITE                  0x1
42 #define GUP_FLAGS_WRITE                  0x1
43 #define GUP_FLAGS_FORCE                  0x2
44 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
45 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
46
47 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
48 struct mm_struct* init_mm_ptr;
49 struct mm_struct init_mm;
50 #endif
51
52
53 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
54
55 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
56
57 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
58
59 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
60 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
61 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
62 #endif
63 #else
64 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
65 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
66
67 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
68 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
69 DECLARE_MOD_FUNC_DEP(follow_page, \
70                 struct page *, struct vm_area_struct * vma, \
71                 unsigned long address, unsigned int foll_flags);
72 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
73                 void, struct vm_area_struct *vma, struct page *page, \
74                 unsigned long vmaddr);
75 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
76                 struct page *, struct vm_area_struct *vma, \
77                 unsigned long addr, pte_t pte);
78
79 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
80                 void, struct vm_area_struct *vma, struct page *page, \
81                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
82
83
84 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
85 DECLARE_MOD_FUNC_DEP(put_task_struct, \
86                 void, struct task_struct *tsk);
87 #else 
88 DECLARE_MOD_FUNC_DEP(put_task_struct, \
89                 void, struct rcu_head * rhp);
90 #endif
91
92         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
93 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
94
95         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
96 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
97
98 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
99 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
100         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
101                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
102 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
103 #endif
104 #else
105         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
106                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
107 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
108 #endif
109
110         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
111                         struct vm_area_struct *, struct task_struct *tsk)
112 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
113
114         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
115 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
116
117         DECLARE_MOD_DEP_WRAPPER (follow_page, \
118                         struct page *, struct vm_area_struct * vma, \
119                         unsigned long address, unsigned int foll_flags)
120 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
121
122         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
123                         void, struct vm_area_struct *vma, \
124                         struct page *page, unsigned long vmaddr)
125 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
126
127         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
128                         struct page *, struct vm_area_struct *vma, \
129                         unsigned long addr, pte_t pte)
130 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
131
132         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
133                         void, struct vm_area_struct *vma, struct page *page, \
134                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
135 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
136
137 int init_module_dependencies()
138 {
139
140 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 29)
141         init_mm_ptr = (struct mm_struct*) kallsyms_search ("init_mm");
142         memcmp(init_mm_ptr, &init_mm, sizeof(struct mm_struct));
143 #endif
144
145 //#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
146 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
147         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
148 #endif
149 //#endif
150
151         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
152         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
153         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
154         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
155         INIT_MOD_DEP_VAR(follow_page, follow_page);
156         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
157         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
158         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
159
160 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
161 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
162         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
163 # else
164         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
165 # endif
166 #else /*2.6.16 */
167         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
168 #endif
169
170         return 0;
171 }
172
173 #define GUP_FLAGS_WRITE                  0x1
174 #define GUP_FLAGS_FORCE                  0x2
175 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
176 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
177
178 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
179 static inline int use_zero_page(struct vm_area_struct *vma)
180 {
181         /*
182          * We don't want to optimize FOLL_ANON for make_pages_present()
183          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
184          * we want to get the page from the page tables to make sure
185          * that we serialize and update with any other user of that
186          * mapping.
187          */
188         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
189                 return 0;
190         /*
191          * And if we have a fault routine, it's not an anonymous region.
192          */
193         return !vma->vm_ops || !vma->vm_ops->fault;
194 }
195
196 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
197                      unsigned long start, int len, int flags,
198                 struct page **pages, struct vm_area_struct **vmas)
199 {
200         int i;
201         unsigned int vm_flags = 0;
202         int write = !!(flags & GUP_FLAGS_WRITE);
203         int force = !!(flags & GUP_FLAGS_FORCE);
204         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
205         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
206
207         if (len <= 0)
208                 return 0;
209         /* 
210          * Require read or write permissions.
211          * If 'force' is set, we only require the "MAY" flags.
212          */
213         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
214         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
215         i = 0;
216
217         do {
218                 struct vm_area_struct *vma;
219                 unsigned int foll_flags;
220
221                 //vma = find_extend_vma(mm, start);
222                 vma = find_vma(mm, start);
223                 if (!vma && in_gate_area(tsk, start)) {
224                         unsigned long pg = start & PAGE_MASK;
225                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
226                         pgd_t *pgd;
227                         pud_t *pud;
228                         pmd_t *pmd;
229                         pte_t *pte;
230
231                         /* user gate pages are read-only */
232                         if (!ignore && write)
233                                 return i ? : -EFAULT;
234                         if (pg > TASK_SIZE)
235                                 pgd = pgd_offset_k(pg);
236                         else
237                                 pgd = pgd_offset_gate(mm, pg);
238                         BUG_ON(pgd_none(*pgd));
239                         pud = pud_offset(pgd, pg);
240                         BUG_ON(pud_none(*pud));
241                         pmd = pmd_offset(pud, pg);
242                         if (pmd_none(*pmd))
243                                 return i ? : -EFAULT;
244                         pte = pte_offset_map(pmd, pg);
245                         if (pte_none(*pte)) {
246                                 pte_unmap(pte);
247                                 return i ? : -EFAULT;
248                         }
249                         if (pages) {
250                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
251                                 pages[i] = page;
252                                 if (page)
253                                         get_page(page);
254                         }
255                         pte_unmap(pte);
256                         if (vmas)
257                                 vmas[i] = gate_vma;
258                         i++;
259                         start += PAGE_SIZE;
260                         len--;
261                         continue;
262                 }
263
264                 if (!vma ||
265                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
266                     (!ignore && !(vm_flags & vma->vm_flags)))
267                         return i ? : -EFAULT;
268
269                 if (is_vm_hugetlb_page(vma)) {
270 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
271                         i = follow_hugetlb_page(mm, vma, pages, vmas,
272                                                 &start, &len, i);
273 #else
274                         i = follow_hugetlb_page(mm, vma, pages, vmas,
275                                                 &start, &len, i, write);
276 #endif
277                         continue;
278                 }
279
280                 foll_flags = FOLL_TOUCH;
281                 if (pages)
282                         foll_flags |= FOLL_GET;
283
284 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
285 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
286                 if (!write && use_zero_page(vma))
287                   foll_flags |= FOLL_ANON;
288 #endif
289 #endif
290
291                 do {
292                         struct page *page;
293
294 #if 0
295                         /*
296                          * If we have a pending SIGKILL, don't keep faulting
297                          * pages and potentially allocating memory, unless
298                          * current is handling munlock--e.g., on exit. In
299                          * that case, we are not allocating memory.  Rather,
300                          * we're only unlocking already resident/mapped pages.
301                          */
302                         if (unlikely(!ignore_sigkill &&
303                                         fatal_signal_pending(current)))
304                                 return i ? i : -ERESTARTSYS;
305 #endif
306
307                         if (write)
308                                 foll_flags |= FOLL_WRITE;
309
310                         
311                         //cond_resched();
312
313                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
314                         while (!(page = follow_page(vma, start, foll_flags))) {
315                                 int ret;
316                                 ret = handle_mm_fault(mm, vma, start,
317                                                 foll_flags & FOLL_WRITE);
318
319 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
320                                 if (ret & VM_FAULT_WRITE)
321                                   foll_flags &= ~FOLL_WRITE;
322                                 
323                                 switch (ret & ~VM_FAULT_WRITE) {
324                                 case VM_FAULT_MINOR:
325                                   tsk->min_flt++;
326                                   break;
327                                 case VM_FAULT_MAJOR:
328                                   tsk->maj_flt++;
329                                   break;
330                                 case VM_FAULT_SIGBUS:
331                                   return i ? i : -EFAULT;
332                                 case VM_FAULT_OOM:
333                                   return i ? i : -ENOMEM;
334                                 default:
335                                   BUG();
336                                 }
337                                 
338 #else
339                                 if (ret & VM_FAULT_ERROR) {
340                                   if (ret & VM_FAULT_OOM)
341                                     return i ? i : -ENOMEM;
342                                   else if (ret & VM_FAULT_SIGBUS)
343                                     return i ? i : -EFAULT;
344                                   BUG();
345                                 }
346                                 if (ret & VM_FAULT_MAJOR)
347                                   tsk->maj_flt++;
348                                 else
349                                   tsk->min_flt++;
350                                 
351                                 /*
352                                  * The VM_FAULT_WRITE bit tells us that
353                                  * do_wp_page has broken COW when necessary,
354                                  * even if maybe_mkwrite decided not to set
355                                  * pte_write. We can thus safely do subsequent
356                                  * page lookups as if they were reads. But only
357                                  * do so when looping for pte_write is futile:
358                                  * in some cases userspace may also be wanting
359                                  * to write to the gotten user page, which a
360                                  * read fault here might prevent (a readonly
361                                  * page might get reCOWed by userspace write).
362                                  */
363                                 if ((ret & VM_FAULT_WRITE) &&
364                                     !(vma->vm_flags & VM_WRITE))
365                                   foll_flags &= ~FOLL_WRITE;
366                                 
367                                 //cond_resched();
368 #endif
369                                 
370                         }
371
372                         if (IS_ERR(page))
373                                 return i ? i : PTR_ERR(page);
374                         if (pages) {
375                                 pages[i] = page;
376
377 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
378                                 flush_anon_page(page, start);
379 #else
380                                 flush_anon_page(vma, page, start);
381 #endif
382                                 flush_dcache_page(page);
383                         }
384                         if (vmas)
385                                 vmas[i] = vma;
386                         i++;
387                         start += PAGE_SIZE;
388                         len--;
389                 } while (len && start < vma->vm_end);
390         } while (len);
391         return i;
392 }
393 #endif
394
395
396 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
397                 unsigned long start, int len, int write, int force,
398                 struct page **pages, struct vm_area_struct **vmas)
399 {
400 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
401         int flags = 0;
402
403         if (write)
404                 flags |= GUP_FLAGS_WRITE;
405         if (force)
406                 flags |= GUP_FLAGS_FORCE;
407
408         return __get_user_pages_uprobe(tsk, mm,
409                                 start, len, flags,
410                                 pages, vmas);
411 #else
412         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
413 #endif
414 }
415
416 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
417 {
418         struct mm_struct *mm;
419         struct vm_area_struct *vma;
420         void *old_buf = buf;
421
422         mm = get_task_mm(tsk);
423         if (!mm)
424                 return 0;
425
426         down_read(&mm->mmap_sem);
427         /* ignore errors, just check how much was successfully transferred */
428         while (len) {
429                 int bytes, ret, offset;
430                 void *maddr;
431                 struct page *page = NULL;
432
433                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
434                                 write, 1, &page, &vma);
435                 if (ret <= 0) {
436                         /*
437                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
438                          * we can access using slightly different code.
439                          */
440 #ifdef CONFIG_HAVE_IOREMAP_PROT
441                         vma = find_vma(mm, addr);
442                         if (!vma)
443                                 break;
444                         if (vma->vm_ops && vma->vm_ops->access)
445                                 ret = vma->vm_ops->access(vma, addr, buf,
446                                                           len, write);
447                         if (ret <= 0)
448 #endif
449                                 break;
450                         bytes = ret;
451                 } else {
452                         bytes = len;
453                         offset = addr & (PAGE_SIZE-1);
454                         if (bytes > PAGE_SIZE-offset)
455                                 bytes = PAGE_SIZE-offset;
456
457                         maddr = kmap(page);
458                         if (write) {
459                                 copy_to_user_page(vma, page, addr,
460                                                   maddr + offset, buf, bytes);
461                                 set_page_dirty_lock(page);
462                         } else {
463                                 copy_from_user_page(vma, page, addr,
464                                                     buf, maddr + offset, bytes);
465                         }
466                         kunmap(page);
467                         page_cache_release(page);
468                 }
469                 len -= bytes;
470                 buf += bytes;
471                 addr += bytes;
472         }
473         up_read(&mm->mmap_sem);
474         mmput(mm);
475
476         return buf - old_buf;
477 }
478
479 int page_present (struct mm_struct *mm, unsigned long address)
480 {
481         pgd_t *pgd;
482         pud_t *pud;
483         pmd_t *pmd;
484         pte_t *ptep, pte;
485         unsigned long pfn;
486
487         pgd = pgd_offset(mm, address);
488         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
489                 goto out;
490
491         pud = pud_offset(pgd, address);
492         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
493                 goto out;
494
495         pmd = pmd_offset(pud, address);
496         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
497                 goto out;
498
499         ptep = pte_offset_map(pmd, address);
500         if (!ptep)
501                 goto out;
502
503         pte = *ptep;
504         pte_unmap(ptep);
505         if (pte_present(pte)) {
506                 pfn = pte_pfn(pte);
507                 if (pfn_valid(pfn)) {
508                         return 1;
509                 }
510         }
511
512 out:
513         return 0;
514 }
515
516
517 EXPORT_SYMBOL_GPL (page_present);
518 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
519 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
520