[FIX] handling handler_mm_fault retprobe() kernel fail
[kernel/swap-modules.git] / uprobe / arch / asm-x86 / swap_uprobes.c
1 /*
2  *  Dynamic Binary Instrumentation Module based on KProbes
3  *  modules/uprobe/arch/asm-x86/swap_uprobes.c
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18  *
19  * Copyright (C) Samsung Electronics, 2006-2010
20  *
21  * 2008-2009    Alexey Gerenkov <a.gerenkov@samsung.com> User-Space
22  *              Probes initial implementation; Support x86/ARM/MIPS for both user and kernel spaces.
23  * 2010         Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for separating core and arch parts
24  *
25  */
26
27 #include <linux/kdebug.h>
28 #include <asm/dbi_kprobes.h>
29 #include <swap_uprobes.h>
30 #include <asm/swap_uprobes.h>
31 #include <dbi_insn_slots.h>
32
33 struct uprobe_ctlblk {
34         unsigned long flags;
35         struct kprobe *p;
36 };
37
38 static DEFINE_PER_CPU(struct uprobe_ctlblk, ucb) = { 0, NULL };
39
40 static void save_current_flags(struct pt_regs *regs)
41 {
42         __get_cpu_var(ucb).flags = regs->EREG(flags);
43 }
44
45 static void restore_current_flags(struct pt_regs *regs)
46 {
47         regs->EREG(flags) &= ~IF_MASK;
48         regs->EREG(flags) |= __get_cpu_var(ucb).flags & IF_MASK;
49 }
50
51 int arch_prepare_uprobe(struct uprobe *up, struct hlist_head *page_list)
52 {
53         int ret = 0;
54         struct kprobe *p = &up->kp;
55         struct task_struct *task = up->task;
56         kprobe_opcode_t insns[UPROBES_TRAMP_LEN];
57
58         if (!ret) {
59                 kprobe_opcode_t insn[MAX_INSN_SIZE];
60                 struct arch_specific_insn ainsn;
61
62                 if (!read_proc_vm_atomic(task, (unsigned long)p->addr, &insn, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
63                         panic("failed to read memory %p!\n", p->addr);
64
65                 ainsn.insn = insn;
66                 ret = arch_check_insn(&ainsn);
67                 if (!ret) {
68                         p->opcode = insn[0];
69                         p->ainsn.insn = alloc_insn_slot(up->sm);
70                         if (!p->ainsn.insn)
71                                 return -ENOMEM;
72
73                         if (can_boost(insn))
74                                 p->ainsn.boostable = 0;
75                         else
76                                 p->ainsn.boostable = -1;
77
78                         memcpy(&insns[UPROBES_TRAMP_INSN_IDX], insn, MAX_INSN_SIZE*sizeof(kprobe_opcode_t));
79                         insns[UPROBES_TRAMP_RET_BREAK_IDX] = BREAKPOINT_INSTRUCTION;
80
81                         if (!write_proc_vm_atomic(task, (unsigned long)p->ainsn.insn, insns, sizeof(insns))) {
82                                 free_insn_slot(up->sm, p->ainsn.insn);
83                                 panic("failed to write memory %p!\n", p->ainsn.insn);
84                                 return -EINVAL;
85                         }
86                 }
87         }
88
89         return ret;
90 }
91
92 int setjmp_upre_handler(struct kprobe *p, struct pt_regs *regs)
93 {
94         struct uprobe *up = container_of(p, struct uprobe, kp);
95         struct ujprobe *jp = container_of(up, struct ujprobe, up);
96         kprobe_pre_entry_handler_t pre_entry = (kprobe_pre_entry_handler_t)jp->pre_entry;
97         entry_point_t entry = (entry_point_t)jp->entry;
98         unsigned long addr, args[6];
99
100         /* FIXME some user space apps crash if we clean interrupt bit */
101         //regs->EREG(flags) &= ~IF_MASK;
102 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
103         trace_hardirqs_off();
104 #endif
105
106         /* read first 6 args from stack */
107         if (!read_proc_vm_atomic(current, regs->EREG(sp) + 4, args, sizeof(args)))
108                 panic("failed to read user space func arguments %lx!\n", regs->EREG(sp) + 4);
109
110         if (pre_entry)
111                 p->ss_addr = pre_entry(jp->priv_arg, regs);
112
113         if (entry)
114                 entry(args[0], args[1], args[2], args[3], args[4], args[5]);
115         else
116                 arch_ujprobe_return();
117
118         return 0;
119 }
120
121 void arch_prepare_uretprobe(struct uretprobe_instance *ri, struct pt_regs *regs)
122 {
123         /* Replace the return addr with trampoline addr */
124         unsigned long ra = (unsigned long)(ri->rp->up.kp.ainsn.insn + UPROBES_TRAMP_RET_BREAK_IDX);
125
126         if (!read_proc_vm_atomic(current, regs->EREG(sp), &(ri->ret_addr), sizeof(ri->ret_addr)))
127                 panic("failed to read user space func ra %lx!\n", regs->EREG(sp));
128
129         if (!write_proc_vm_atomic(current, regs->EREG(sp), &ra, sizeof(ra)))
130                 panic("failed to write user space func ra %lx!\n", regs->EREG(sp));
131 }
132
133 unsigned long arch_get_trampoline_addr(struct kprobe *p, struct pt_regs *regs)
134 {
135         return (unsigned long)(p->ainsn.insn + UPROBES_TRAMP_RET_BREAK_IDX);
136 }
137
138 void arch_set_orig_ret_addr(unsigned long orig_ret_addr, struct pt_regs *regs)
139 {
140         regs->EREG(ip) = orig_ret_addr;
141 }
142
143 static void set_user_jmp_op(void *from, void *to)
144 {
145         struct __arch_jmp_op
146         {
147                 char op;
148                 long raddr;
149         } __attribute__ ((packed)) jop;
150
151         jop.raddr = (long)(to) - ((long)(from) + 5);
152         jop.op = RELATIVEJUMP_INSTRUCTION;
153
154         if (!write_proc_vm_atomic(current, (unsigned long)from, &jop, sizeof(jop)))
155                 panic("failed to write jump opcode to user space %p!\n", from);
156 }
157
158 static void resume_execution(struct kprobe *p, struct pt_regs *regs, unsigned long flags)
159 {
160         unsigned long *tos, tos_dword = 0;
161         unsigned long copy_eip = (unsigned long)p->ainsn.insn;
162         unsigned long orig_eip = (unsigned long)p->addr;
163         kprobe_opcode_t insns[2];
164
165         regs->EREG(flags) &= ~TF_MASK;
166
167         tos = (unsigned long *)&tos_dword;
168         if (!read_proc_vm_atomic(current, regs->EREG(sp), &tos_dword, sizeof(tos_dword)))
169                 panic("failed to read dword from top of the user space stack %lx!\n", regs->EREG(sp));
170
171         if (!read_proc_vm_atomic(current, (unsigned long)p->ainsn.insn, insns, 2 * sizeof(kprobe_opcode_t)))
172                 panic("failed to read first 2 opcodes of instruction copy from user space %p!\n", p->ainsn.insn);
173
174         switch (insns[0]) {
175                 case 0x9c:              /* pushfl */
176                         *tos &= ~(TF_MASK | IF_MASK);
177                         *tos |= flags & (TF_MASK | IF_MASK);
178                         break;
179                 case 0xc2:              /* iret/ret/lret */
180                 case 0xc3:
181                 case 0xca:
182                 case 0xcb:
183                 case 0xcf:
184                 case 0xea:              /* jmp absolute -- eip is correct */
185                         /* eip is already adjusted, no more changes required */
186                         p->ainsn.boostable = 1;
187                         goto no_change;
188                 case 0xe8:              /* call relative - Fix return addr */
189                         *tos = orig_eip + (*tos - copy_eip);
190                         break;
191                 case 0x9a:              /* call absolute -- same as call absolute, indirect */
192                         *tos = orig_eip + (*tos - copy_eip);
193
194                         if (!write_proc_vm_atomic(current, regs->EREG (sp), &tos_dword, sizeof(tos_dword)))
195                                 panic("failed to write dword to top of the user space stack %lx!\n", regs->EREG (sp));
196
197                         goto no_change;
198                 case 0xff:
199                         if ((insns[1] & 0x30) == 0x10) {
200                                 /*
201                                  * call absolute, indirect
202                                  * Fix return addr; eip is correct.
203                                  * But this is not boostable
204                                  */
205                                 *tos = orig_eip + (*tos - copy_eip);
206
207                                 if (!write_proc_vm_atomic(current, regs->EREG(sp), &tos_dword, sizeof(tos_dword)))
208                                         panic("failed to write dword to top of the user space stack %lx!\n", regs->EREG(sp));
209
210                                 goto no_change;
211                         } else if (((insns[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
212                                    ((insns[1] & 0x31) == 0x21)) {
213                                 /* jmp far, absolute indirect */
214                                 /* eip is correct. And this is boostable */
215                                 p->ainsn.boostable = 1;
216                                 goto no_change;
217                         }
218                 default:
219                         break;
220         }
221
222         if (!write_proc_vm_atomic(current, regs->EREG(sp), &tos_dword, sizeof(tos_dword)))
223                 panic("failed to write dword to top of the user space stack %lx!\n", regs->EREG(sp));
224
225         if (p->ainsn.boostable == 0) {
226                 if ((regs->EREG(ip) > copy_eip) && (regs->EREG(ip) - copy_eip) + 5 < MAX_INSN_SIZE) {
227                         /*
228                          * These instructions can be executed directly if it
229                          * jumps back to correct address.
230                          */
231                         set_user_jmp_op((void *) regs->EREG(ip), (void *)orig_eip + (regs->EREG(ip) - copy_eip));
232                         p->ainsn.boostable = 1;
233                 } else {
234                         p->ainsn.boostable = -1;
235                 }
236         }
237
238         regs->EREG(ip) = orig_eip + (regs->EREG(ip) - copy_eip);
239
240 no_change:
241         return;
242 }
243
244 static int uprobe_handler(struct pt_regs *regs)
245 {
246         struct kprobe *p;
247         kprobe_opcode_t *addr;
248         struct task_struct *task = current;
249         pid_t tgid = task->tgid;
250
251         save_current_flags(regs);
252
253         addr = (kprobe_opcode_t *)(regs->EREG(ip) - sizeof(kprobe_opcode_t));
254         p = get_ukprobe(addr, tgid);
255
256         if (p == NULL) {
257                 p = get_ukprobe_by_insn_slot(addr, tgid, regs);
258
259                 if (p == NULL) {
260                         printk("no_uprobe\n");
261                         return 0;
262                 }
263
264                 trampoline_uprobe_handler(p, regs);
265         } else {
266                 if (!p->pre_handler || !p->pre_handler(p, regs))
267                         prepare_singlestep(p, regs);
268         }
269
270         __get_cpu_var(ucb).p = p;
271
272         return 1;
273 }
274
275 static int post_uprobe_handler(struct pt_regs *regs)
276 {
277         struct kprobe *p = __get_cpu_var(ucb).p;
278         unsigned long flags = __get_cpu_var(ucb).flags;
279
280         resume_execution(p, regs, flags);
281         restore_current_flags(regs);
282
283         return 1;
284 }
285
286 static int uprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
287 {
288         struct die_args *args = (struct die_args *)data;
289         int ret = NOTIFY_DONE;
290
291         if (args->regs && !user_mode_vm(args->regs))
292                 return ret;
293
294         switch (val) {
295 #ifdef CONFIG_KPROBES
296                 case DIE_INT3:
297 #else
298                 case DIE_TRAP:
299 #endif
300                         if (uprobe_handler(args->regs))
301                                 ret = NOTIFY_STOP;
302                         break;
303                 case DIE_DEBUG:
304                         if (post_uprobe_handler(args->regs))
305                                 ret = NOTIFY_STOP;
306                         break;
307                 default:
308                         break;
309         }
310
311         return ret;
312 }
313
314 static struct notifier_block uprobe_exceptions_nb = {
315         .notifier_call = uprobe_exceptions_notify,
316         .priority = INT_MAX
317 };
318
319 int swap_arch_init_uprobes(void)
320 {
321         return register_die_notifier(&uprobe_exceptions_nb);
322 }
323
324 void swap_arch_exit_uprobes(void)
325 {
326         unregister_die_notifier(&uprobe_exceptions_nb);
327 }
328