Fix problem of KProbe insertion for DTV
[kernel/swap-modules.git] / kprobe / dbi_kprobes_deps.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/kprobe/dbi_kprobes_deps.h
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts 
24  *
25
26  */
27
28 #include <linux/module.h>
29 #include <linux/sched.h>
30
31 #include <asm/pgtable.h>
32
33 #include "dbi_kprobes_deps.h"
34 #include "dbi_kdebug.h"
35
36
37 unsigned int *sched_addr;
38 unsigned int *fork_addr;
39
40
41 #define GUP_FLAGS_WRITE                  0x1
42 #define GUP_FLAGS_WRITE                  0x1
43 #define GUP_FLAGS_FORCE                  0x2
44 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
45 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
46
47 DECLARE_MOD_CB_DEP(kallsyms_search, unsigned long, const char *name);
48
49 DECLARE_MOD_FUNC_DEP(access_process_vm, int, struct task_struct * tsk, unsigned long addr, void *buf, int len, int write);
50
51 DECLARE_MOD_FUNC_DEP(find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr);
52
53 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
54 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
55 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access);
56 #endif
57 #else
58 DECLARE_MOD_FUNC_DEP(handle_mm_fault, int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags);
59 #endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30) */
60
61 DECLARE_MOD_FUNC_DEP(get_gate_vma, struct vm_area_struct *, struct task_struct *tsk);
62 DECLARE_MOD_FUNC_DEP(in_gate_area_no_task, int, unsigned long addr);
63 DECLARE_MOD_FUNC_DEP(follow_page, \
64                 struct page *, struct vm_area_struct * vma, \
65                 unsigned long address, unsigned int foll_flags);
66 DECLARE_MOD_FUNC_DEP(__flush_anon_page, \
67                 void, struct vm_area_struct *vma, struct page *page, \
68                 unsigned long vmaddr);
69 DECLARE_MOD_FUNC_DEP(vm_normal_page, \
70                 struct page *, struct vm_area_struct *vma, \
71                 unsigned long addr, pte_t pte);
72
73 DECLARE_MOD_FUNC_DEP(flush_ptrace_access, \
74                 void, struct vm_area_struct *vma, struct page *page, \
75                 unsigned long uaddr, void *kaddr, unsigned long len, int write);
76
77
78 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
79 DECLARE_MOD_FUNC_DEP(put_task_struct, \
80                 void, struct task_struct *tsk);
81 #else 
82 DECLARE_MOD_FUNC_DEP(put_task_struct, \
83                 void, struct rcu_head * rhp);
84 #endif
85
86         DECLARE_MOD_DEP_WRAPPER(access_process_vm, int, struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
87 IMP_MOD_DEP_WRAPPER (access_process_vm, tsk, addr, buf, len, write)
88
89         DECLARE_MOD_DEP_WRAPPER (find_extend_vma, struct vm_area_struct *, struct mm_struct * mm, unsigned long addr)
90 IMP_MOD_DEP_WRAPPER (find_extend_vma, mm, addr)
91
92 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
93 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
94         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
95                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
96 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, write_access)
97 #endif
98 #else
99         DECLARE_MOD_DEP_WRAPPER (handle_mm_fault, \
100                         int, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags)
101 IMP_MOD_DEP_WRAPPER (handle_mm_fault, mm, vma, address, flags)
102 #endif
103
104         DECLARE_MOD_DEP_WRAPPER (get_gate_vma, \
105                         struct vm_area_struct *, struct task_struct *tsk)
106 IMP_MOD_DEP_WRAPPER (get_gate_vma, tsk)
107
108         DECLARE_MOD_DEP_WRAPPER (in_gate_area_no_task, int, unsigned long addr)
109 IMP_MOD_DEP_WRAPPER (in_gate_area_no_task, addr)
110
111         DECLARE_MOD_DEP_WRAPPER (follow_page, \
112                         struct page *, struct vm_area_struct * vma, \
113                         unsigned long address, unsigned int foll_flags)
114 IMP_MOD_DEP_WRAPPER (follow_page, vma, address, foll_flags)
115
116         DECLARE_MOD_DEP_WRAPPER (__flush_anon_page, \
117                         void, struct vm_area_struct *vma, \
118                         struct page *page, unsigned long vmaddr)
119 IMP_MOD_DEP_WRAPPER (__flush_anon_page, vma, page, vmaddr)
120
121         DECLARE_MOD_DEP_WRAPPER(vm_normal_page, \
122                         struct page *, struct vm_area_struct *vma, \
123                         unsigned long addr, pte_t pte)
124 IMP_MOD_DEP_WRAPPER (vm_normal_page, vma, addr, pte)
125
126         DECLARE_MOD_DEP_WRAPPER (flush_ptrace_access, \
127                         void, struct vm_area_struct *vma, struct page *page, \
128                         unsigned long uaddr, void *kaddr, unsigned long len, int write)
129 IMP_MOD_DEP_WRAPPER (flush_ptrace_access, vma, page, uaddr, kaddr, len, write)
130
131 int init_module_dependencies()
132 {
133 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 30)
134 #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
135         INIT_MOD_DEP_VAR(handle_mm_fault, handle_mm_fault);
136 #endif
137 #endif
138
139         INIT_MOD_DEP_VAR(flush_ptrace_access, flush_ptrace_access);
140         INIT_MOD_DEP_VAR(find_extend_vma, find_extend_vma);
141         INIT_MOD_DEP_VAR(get_gate_vma, get_gate_vma);
142         INIT_MOD_DEP_VAR(in_gate_area_no_task, in_gate_area_no_task);
143         INIT_MOD_DEP_VAR(follow_page, follow_page);
144         INIT_MOD_DEP_VAR(__flush_anon_page, __flush_anon_page);
145         INIT_MOD_DEP_VAR(vm_normal_page, vm_normal_page);
146         INIT_MOD_DEP_VAR(access_process_vm, access_process_vm);
147
148 #if (LINUX_VERSION_CODE != KERNEL_VERSION(2, 6, 16))
149 # if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11))
150         INIT_MOD_DEP_VAR(put_task_struct, put_task_struct);
151 # else
152         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct);
153 # endif
154 #else /*2.6.16 */
155         INIT_MOD_DEP_VAR(put_task_struct, __put_task_struct_cb);
156 #endif
157
158         return 0;
159 }
160
161 #define GUP_FLAGS_WRITE                  0x1
162 #define GUP_FLAGS_FORCE                  0x2
163 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
164 #define GUP_FLAGS_IGNORE_SIGKILL         0x8
165
166 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
167 static inline int use_zero_page(struct vm_area_struct *vma)
168 {
169         /*
170          * We don't want to optimize FOLL_ANON for make_pages_present()
171          * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
172          * we want to get the page from the page tables to make sure
173          * that we serialize and update with any other user of that
174          * mapping.
175          */
176         if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
177                 return 0;
178         /*
179          * And if we have a fault routine, it's not an anonymous region.
180          */
181         return !vma->vm_ops || !vma->vm_ops->fault;
182 }
183
184 int __get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
185                      unsigned long start, int len, int flags,
186                 struct page **pages, struct vm_area_struct **vmas)
187 {
188         int i;
189         unsigned int vm_flags = 0;
190         int write = !!(flags & GUP_FLAGS_WRITE);
191         int force = !!(flags & GUP_FLAGS_FORCE);
192         int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
193         int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
194
195         if (len <= 0)
196                 return 0;
197         /* 
198          * Require read or write permissions.
199          * If 'force' is set, we only require the "MAY" flags.
200          */
201         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
202         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
203         i = 0;
204
205         do {
206                 struct vm_area_struct *vma;
207                 unsigned int foll_flags;
208
209                 //vma = find_extend_vma(mm, start);
210                 vma = find_vma(mm, start);
211                 if (!vma && in_gate_area(tsk, start)) {
212                         unsigned long pg = start & PAGE_MASK;
213                         struct vm_area_struct *gate_vma = get_gate_vma(tsk);
214                         pgd_t *pgd;
215                         pud_t *pud;
216                         pmd_t *pmd;
217                         pte_t *pte;
218
219                         /* user gate pages are read-only */
220                         if (!ignore && write)
221                                 return i ? : -EFAULT;
222                         if (pg > TASK_SIZE)
223                                 pgd = pgd_offset_k(pg);
224                         else
225                                 pgd = pgd_offset_gate(mm, pg);
226                         BUG_ON(pgd_none(*pgd));
227                         pud = pud_offset(pgd, pg);
228                         BUG_ON(pud_none(*pud));
229                         pmd = pmd_offset(pud, pg);
230                         if (pmd_none(*pmd))
231                                 return i ? : -EFAULT;
232                         pte = pte_offset_map(pmd, pg);
233                         if (pte_none(*pte)) {
234                                 pte_unmap(pte);
235                                 return i ? : -EFAULT;
236                         }
237                         if (pages) {
238                                 struct page *page = vm_normal_page(gate_vma, start, *pte);
239                                 pages[i] = page;
240                                 if (page)
241                                         get_page(page);
242                         }
243                         pte_unmap(pte);
244                         if (vmas)
245                                 vmas[i] = gate_vma;
246                         i++;
247                         start += PAGE_SIZE;
248                         len--;
249                         continue;
250                 }
251
252                 if (!vma ||
253                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
254                     (!ignore && !(vm_flags & vma->vm_flags)))
255                         return i ? : -EFAULT;
256
257                 if (is_vm_hugetlb_page(vma)) {
258 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
259                         i = follow_hugetlb_page(mm, vma, pages, vmas,
260                                                 &start, &len, i);
261 #else
262                         i = follow_hugetlb_page(mm, vma, pages, vmas,
263                                                 &start, &len, i, write);
264 #endif
265                         continue;
266                 }
267
268                 foll_flags = FOLL_TOUCH;
269                 if (pages)
270                         foll_flags |= FOLL_GET;
271
272 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
273 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,30)
274                 if (!write && use_zero_page(vma))
275                   foll_flags |= FOLL_ANON;
276 #endif
277 #endif
278
279                 do {
280                         struct page *page;
281
282 #if 0
283                         /*
284                          * If we have a pending SIGKILL, don't keep faulting
285                          * pages and potentially allocating memory, unless
286                          * current is handling munlock--e.g., on exit. In
287                          * that case, we are not allocating memory.  Rather,
288                          * we're only unlocking already resident/mapped pages.
289                          */
290                         if (unlikely(!ignore_sigkill &&
291                                         fatal_signal_pending(current)))
292                                 return i ? i : -ERESTARTSYS;
293 #endif
294
295                         if (write)
296                                 foll_flags |= FOLL_WRITE;
297
298                         
299                         //cond_resched();
300
301                         DBPRINTF ("pages = %p vma = %p\n", pages, vma);
302                         while (!(page = follow_page(vma, start, foll_flags))) {
303                                 int ret;
304                                 ret = handle_mm_fault(mm, vma, start,
305                                                 foll_flags & FOLL_WRITE);
306
307 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
308                                 if (ret & VM_FAULT_WRITE)
309                                   foll_flags &= ~FOLL_WRITE;
310                                 
311                                 switch (ret & ~VM_FAULT_WRITE) {
312                                 case VM_FAULT_MINOR:
313                                   tsk->min_flt++;
314                                   break;
315                                 case VM_FAULT_MAJOR:
316                                   tsk->maj_flt++;
317                                   break;
318                                 case VM_FAULT_SIGBUS:
319                                   return i ? i : -EFAULT;
320                                 case VM_FAULT_OOM:
321                                   return i ? i : -ENOMEM;
322                                 default:
323                                   BUG();
324                                 }
325                                 
326 #else
327                                 if (ret & VM_FAULT_ERROR) {
328                                   if (ret & VM_FAULT_OOM)
329                                     return i ? i : -ENOMEM;
330                                   else if (ret & VM_FAULT_SIGBUS)
331                                     return i ? i : -EFAULT;
332                                   BUG();
333                                 }
334                                 if (ret & VM_FAULT_MAJOR)
335                                   tsk->maj_flt++;
336                                 else
337                                   tsk->min_flt++;
338                                 
339                                 /*
340                                  * The VM_FAULT_WRITE bit tells us that
341                                  * do_wp_page has broken COW when necessary,
342                                  * even if maybe_mkwrite decided not to set
343                                  * pte_write. We can thus safely do subsequent
344                                  * page lookups as if they were reads. But only
345                                  * do so when looping for pte_write is futile:
346                                  * in some cases userspace may also be wanting
347                                  * to write to the gotten user page, which a
348                                  * read fault here might prevent (a readonly
349                                  * page might get reCOWed by userspace write).
350                                  */
351                                 if ((ret & VM_FAULT_WRITE) &&
352                                     !(vma->vm_flags & VM_WRITE))
353                                   foll_flags &= ~FOLL_WRITE;
354                                 
355                                 //cond_resched();
356 #endif
357                                 
358                         }
359
360                         if (IS_ERR(page))
361                                 return i ? i : PTR_ERR(page);
362                         if (pages) {
363                                 pages[i] = page;
364
365 #if  LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
366                                 flush_anon_page(page, start);
367 #else
368                                 flush_anon_page(vma, page, start);
369 #endif
370                                 flush_dcache_page(page);
371                         }
372                         if (vmas)
373                                 vmas[i] = vma;
374                         i++;
375                         start += PAGE_SIZE;
376                         len--;
377                 } while (len && start < vma->vm_end);
378         } while (len);
379         return i;
380 }
381 #endif
382
383
384 int get_user_pages_uprobe(struct task_struct *tsk, struct mm_struct *mm,
385                 unsigned long start, int len, int write, int force,
386                 struct page **pages, struct vm_area_struct **vmas)
387 {
388 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
389         int flags = 0;
390
391         if (write)
392                 flags |= GUP_FLAGS_WRITE;
393         if (force)
394                 flags |= GUP_FLAGS_FORCE;
395
396         return __get_user_pages_uprobe(tsk, mm,
397                                 start, len, flags,
398                                 pages, vmas);
399 #else
400         return get_user_pages(tsk, mm, start, len, write, force, pages, vmas);
401 #endif
402 }
403
404 int access_process_vm_atomic(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
405 {
406         struct mm_struct *mm;
407         struct vm_area_struct *vma;
408         void *old_buf = buf;
409
410         mm = get_task_mm(tsk);
411         if (!mm)
412                 return 0;
413
414         down_read(&mm->mmap_sem);
415         /* ignore errors, just check how much was successfully transferred */
416         while (len) {
417                 int bytes, ret, offset;
418                 void *maddr;
419                 struct page *page = NULL;
420
421                 ret = get_user_pages_uprobe(tsk, mm, addr, 1,
422                                 write, 1, &page, &vma);
423                 if (ret <= 0) {
424                         /*
425                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
426                          * we can access using slightly different code.
427                          */
428 #ifdef CONFIG_HAVE_IOREMAP_PROT
429                         vma = find_vma(mm, addr);
430                         if (!vma)
431                                 break;
432                         if (vma->vm_ops && vma->vm_ops->access)
433                                 ret = vma->vm_ops->access(vma, addr, buf,
434                                                           len, write);
435                         if (ret <= 0)
436 #endif
437                                 break;
438                         bytes = ret;
439                 } else {
440                         bytes = len;
441                         offset = addr & (PAGE_SIZE-1);
442                         if (bytes > PAGE_SIZE-offset)
443                                 bytes = PAGE_SIZE-offset;
444
445                         maddr = kmap(page);
446                         if (write) {
447                                 copy_to_user_page(vma, page, addr,
448                                                   maddr + offset, buf, bytes);
449                                 set_page_dirty_lock(page);
450                         } else {
451                                 copy_from_user_page(vma, page, addr,
452                                                     buf, maddr + offset, bytes);
453                         }
454                         kunmap(page);
455                         page_cache_release(page);
456                 }
457                 len -= bytes;
458                 buf += bytes;
459                 addr += bytes;
460         }
461         up_read(&mm->mmap_sem);
462         mmput(mm);
463
464         return buf - old_buf;
465 }
466
467 int page_present (struct mm_struct *mm, unsigned long address)
468 {
469         pgd_t *pgd;
470         pud_t *pud;
471         pmd_t *pmd;
472         pte_t *ptep, pte;
473         unsigned long pfn;
474
475         pgd = pgd_offset(mm, address);
476         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
477                 goto out;
478
479         pud = pud_offset(pgd, address);
480         if (pud_none(*pud) || unlikely(pud_bad(*pud)))
481                 goto out;
482
483         pmd = pmd_offset(pud, address);
484         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
485                 goto out;
486
487         ptep = pte_offset_map(pmd, address);
488         if (!ptep)
489                 goto out;
490
491         pte = *ptep;
492         pte_unmap(ptep);
493         if (pte_present(pte)) {
494                 pfn = pte_pfn(pte);
495                 if (pfn_valid(pfn)) {
496                         return 1;
497                 }
498         }
499
500 out:
501         return 0;
502 }
503
504
505 EXPORT_SYMBOL_GPL (page_present);
506 EXPORT_SYMBOL_GPL (get_user_pages_uprobe);
507 EXPORT_SYMBOL_GPL (access_process_vm_atomic);
508