[FIX] Save current uprobe state in stack (x86)
[kernel/swap-modules.git] / uprobe / arch / x86 / swap-asm / swap_uprobes.c
1 /**
2  * uprobe/arch/asm-x86/swap_uprobes.c
3  * @author Alexey Gerenkov <a.gerenkov@samsung.com> User-Space Probes initial
4  * implementation; Support x86/ARM/MIPS for both user and kernel spaces.
5  * @author Ekaterina Gorelkina <e.gorelkina@samsung.com>: redesign module for
6  * separating core and arch parts
7  *
8  * @section LICENSE
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23  *
24  * @section COPYRIGHT
25  *
26  * Copyright (C) Samsung Electronics, 2006-2010
27  *
28  * @section DESCRIPTION
29  *
30  * Arch-dependent uprobe interface implementation for x86.
31  */
32
33
34 #include <linux/kdebug.h>
35
36 #include <kprobe/swap_slots.h>
37 #include <uprobe/swap_uprobes.h>
38
39 #include "swap_uprobes.h"
40
41
42 /**
43  * @struct uprobe_ctlblk
44  * @brief Uprobe control block
45  */
46 struct uprobe_ctlblk {
47         unsigned long flags;            /**< Flags */
48         struct kprobe *p;               /**< Pointer to the uprobe's kprobe */
49 };
50
51 static unsigned long trampoline_addr(struct uprobe *up)
52 {
53         return (unsigned long)(up->kp.ainsn.insn +
54                                UPROBES_TRAMP_RET_BREAK_IDX);
55 }
56
57 static struct uprobe_ctlblk *current_ucb(void)
58 {
59         /* FIXME hardcoded offset */
60         return (struct uprobe_ctlblk *)(end_of_stack(current) + 20);
61 }
62
63 static struct kprobe *get_current_probe(void)
64 {
65         return current_ucb()->p;
66 }
67
68 static void set_current_probe(struct kprobe *p)
69 {
70         current_ucb()->p = p;
71 }
72
73 static void save_current_flags(struct pt_regs *regs)
74 {
75         current_ucb()->flags = regs->flags;
76 }
77
78 static void restore_current_flags(struct pt_regs *regs, unsigned long flags)
79 {
80         regs->flags &= ~IF_MASK;
81         regs->flags |= flags & IF_MASK;
82 }
83
84 /**
85  * @brief Prepares uprobe for x86.
86  *
87  * @param up Pointer to the uprobe.
88  * @return 0 on success,\n
89  * -1 on error.
90  */
91 int arch_prepare_uprobe(struct uprobe *up)
92 {
93         struct kprobe *p = up2kp(up);
94         struct task_struct *task = up->task;
95         u8 *tramp = up->atramp.tramp;
96         enum { call_relative_opcode = 0xe8 };
97
98         if (!read_proc_vm_atomic(task, (unsigned long)p->addr,
99                                  tramp, MAX_INSN_SIZE)) {
100                 printk(KERN_ERR "failed to read memory %p!\n", p->addr);
101                 return -EINVAL;
102         }
103         /* TODO: this is a workaround */
104         if (tramp[0] == call_relative_opcode) {
105                 printk(KERN_INFO "cannot install probe: 1st instruction is call\n");
106                 return -EINVAL;
107         }
108
109         tramp[UPROBES_TRAMP_RET_BREAK_IDX] = BREAKPOINT_INSTRUCTION;
110
111         /* TODO: remove dual info */
112         p->opcode = tramp[0];
113
114         p->ainsn.boostable = swap_can_boost(tramp) ? 0 : -1;
115
116         p->ainsn.insn = swap_slot_alloc(up->sm);
117         if (p->ainsn.insn == NULL) {
118                 printk(KERN_ERR "trampoline out of memory\n");
119                 return -ENOMEM;
120         }
121
122         if (!write_proc_vm_atomic(task, (unsigned long)p->ainsn.insn,
123                                   tramp, sizeof(up->atramp.tramp))) {
124                 swap_slot_free(up->sm, p->ainsn.insn);
125                 printk(KERN_INFO "failed to write memory %p!\n", tramp);
126                 return -EINVAL;
127         }
128
129         return 0;
130 }
131
132 /**
133  * @brief Jump pre-handler.
134  *
135  * @param p Pointer to the uprobe's kprobe.
136  * @param regs Pointer to CPU register data.
137  * @return 0.
138  */
139 int setjmp_upre_handler(struct kprobe *p, struct pt_regs *regs)
140 {
141         struct uprobe *up = container_of(p, struct uprobe, kp);
142         struct ujprobe *jp = container_of(up, struct ujprobe, up);
143         kprobe_pre_entry_handler_t pre_entry =
144                 (kprobe_pre_entry_handler_t)jp->pre_entry;
145         entry_point_t entry = (entry_point_t)jp->entry;
146         unsigned long args[6];
147
148         /* FIXME some user space apps crash if we clean interrupt bit */
149         /* regs->EREG(flags) &= ~IF_MASK; */
150 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
151         trace_hardirqs_off();
152 #endif
153
154         /* read first 6 args from stack */
155         if (!read_proc_vm_atomic(current, regs->EREG(sp) + 4,
156                                  args, sizeof(args)))
157                 printk(KERN_WARNING
158                        "failed to read user space func arguments %lx!\n",
159                        regs->sp + 4);
160
161         if (pre_entry)
162                 p->ss_addr[smp_processor_id()] = (kprobe_opcode_t *)
163                                                  pre_entry(jp->priv_arg, regs);
164
165         if (entry)
166                 entry(args[0], args[1], args[2], args[3], args[4], args[5]);
167         else
168                 arch_ujprobe_return();
169
170         return 0;
171 }
172
173 /**
174  * @brief Prepares uretprobe for x86.
175  *
176  * @param ri Pointer to the uretprobe instance.
177  * @param regs Pointer to CPU register data.
178  * @return Void.
179  */
180 int arch_prepare_uretprobe(struct uretprobe_instance *ri, struct pt_regs *regs)
181 {
182         /* Replace the return addr with trampoline addr */
183         unsigned long ra = trampoline_addr(&ri->rp->up);
184         ri->sp = (kprobe_opcode_t *)regs->sp;
185
186         if (!read_proc_vm_atomic(current, regs->EREG(sp), &(ri->ret_addr),
187                                  sizeof(ri->ret_addr))) {
188                 printk(KERN_ERR "failed to read user space func ra %lx addr=%p!\n",
189                                 regs->EREG(sp), ri->rp->up.kp.addr);
190                 return -EINVAL;
191         }
192
193         if (!write_proc_vm_atomic(current, regs->EREG(sp), &ra, sizeof(ra))) {
194                 printk(KERN_ERR "failed to write user space func ra %lx!\n",
195                        regs->EREG(sp));
196                 return -EINVAL;
197         }
198
199         add_uprobe_table(&ri->rp->up.kp);
200
201         return 0;
202 }
203
204 /**
205  * @brief Disarms uretprobe on x86 arch.
206  *
207  * @param ri Pointer to the uretprobe instance.
208  * @param task Pointer to the task for which the probe.
209  * @return 0 on success,\n
210  * negative error code on error.
211  */
212 int arch_disarm_urp_inst(struct uretprobe_instance *ri,
213                          struct task_struct *task)
214 {
215         int len;
216         unsigned long ret_addr;
217         unsigned long sp = (unsigned long)ri->sp;
218         unsigned long tramp_addr = trampoline_addr(&ri->rp->up);
219         len = read_proc_vm_atomic(task, sp, &ret_addr, sizeof(ret_addr));
220         if (len != sizeof(ret_addr)) {
221                 printk(KERN_INFO "---> %s (%d/%d): failed to read stack from %08lx\n",
222                        task->comm, task->tgid, task->pid, sp);
223                 return -EFAULT;
224         }
225
226         if (tramp_addr == ret_addr) {
227                 len = write_proc_vm_atomic(task, sp, &ri->ret_addr,
228                                            sizeof(ri->ret_addr));
229                 if (len != sizeof(ri->ret_addr)) {
230                         printk(KERN_INFO "---> %s (%d/%d): failed to write "
231                                "orig_ret_addr to %08lx",
232                                task->comm, task->tgid, task->pid, sp);
233                         return -EFAULT;
234                 }
235         } else {
236                 printk(KERN_INFO "---> %s (%d/%d): trampoline NOT found at sp = %08lx\n",
237                        task->comm, task->tgid, task->pid, sp);
238                 return -ENOENT;
239         }
240
241         return 0;
242 }
243
244 /**
245  * @brief Gets trampoline address.
246  *
247  * @param p Pointer to the uprobe's kprobe.
248  * @param regs Pointer to CPU register data.
249  * @return Trampoline address.
250  */
251 unsigned long arch_get_trampoline_addr(struct kprobe *p, struct pt_regs *regs)
252 {
253         return trampoline_addr(kp2up(p));
254 }
255
256 /**
257  * @brief Restores return address.
258  *
259  * @param orig_ret_addr Original return address.
260  * @param regs Pointer to CPU register data.
261  * @return Void.
262  */
263 void arch_set_orig_ret_addr(unsigned long orig_ret_addr, struct pt_regs *regs)
264 {
265         regs->EREG(ip) = orig_ret_addr;
266 }
267
268 /**
269  * @brief Removes uprobe.
270  *
271  * @param up Pointer to the target uprobe.
272  * @return Void.
273  */
274 void arch_remove_uprobe(struct uprobe *up)
275 {
276         struct kprobe *p = up2kp(up);
277
278         swap_slot_free(up->sm, p->ainsn.insn);
279 }
280
281 static void set_user_jmp_op(void *from, void *to)
282 {
283         struct __arch_jmp_op {
284                 char op;
285                 long raddr;
286         } __packed jop;
287
288         jop.raddr = (long)(to) - ((long)(from) + 5);
289         jop.op = RELATIVEJUMP_INSTRUCTION;
290
291         if (!write_proc_vm_atomic(current, (unsigned long)from, &jop,
292                                   sizeof(jop)))
293                 printk(KERN_WARNING
294                        "failed to write jump opcode to user space %p\n", from);
295 }
296
297 static void resume_execution(struct kprobe *p,
298                              struct pt_regs *regs,
299                              unsigned long flags)
300 {
301         unsigned long *tos, tos_dword = 0;
302         unsigned long copy_eip = (unsigned long)p->ainsn.insn;
303         unsigned long orig_eip = (unsigned long)p->addr;
304         kprobe_opcode_t insns[2];
305
306         regs->EREG(flags) &= ~TF_MASK;
307
308         tos = (unsigned long *)&tos_dword;
309         if (!read_proc_vm_atomic(current, regs->EREG(sp), &tos_dword,
310                                  sizeof(tos_dword))) {
311                 printk(KERN_WARNING
312                        "failed to read dword from top of the user space stack %lx!\n",
313                        regs->sp);
314                 return;
315         }
316
317         if (!read_proc_vm_atomic(current, (unsigned long)p->ainsn.insn, insns,
318                                  2 * sizeof(kprobe_opcode_t))) {
319                 printk(KERN_WARNING
320                        "failed to read first 2 opcodes of instruction copy from user space %p!\n",
321                        p->ainsn.insn);
322                 return;
323         }
324
325         switch (insns[0]) {
326         case 0x9c: /* pushfl */
327                 *tos &= ~(TF_MASK | IF_MASK);
328                 *tos |= flags & (TF_MASK | IF_MASK);
329                 break;
330         case 0xc2: /* iret/ret/lret */
331         case 0xc3:
332         case 0xca:
333         case 0xcb:
334         case 0xcf:
335         case 0xea: /* jmp absolute -- eip is correct */
336                 /* eip is already adjusted, no more changes required */
337                 p->ainsn.boostable = 1;
338                 goto no_change;
339         case 0xe8: /* call relative - Fix return addr */
340                 *tos = orig_eip + (*tos - copy_eip);
341                 break;
342         case 0x9a: /* call absolute -- same as call absolute, indirect */
343                 *tos = orig_eip + (*tos - copy_eip);
344
345                 if (!write_proc_vm_atomic(current,
346                                           regs->EREG(sp),
347                                           &tos_dword,
348                                           sizeof(tos_dword))) {
349                         printk(KERN_WARNING
350                                "failed to write dword to top of the user space stack %lx!\n",
351                                regs->sp);
352                         return;
353                 }
354
355                 goto no_change;
356         case 0xff:
357                 if ((insns[1] & 0x30) == 0x10) {
358                         /*
359                          * call absolute, indirect
360                          * Fix return addr; eip is correct.
361                          * But this is not boostable
362                          */
363                         *tos = orig_eip + (*tos - copy_eip);
364
365                         if (!write_proc_vm_atomic(current, regs->EREG(sp),
366                                                   &tos_dword,
367                                                   sizeof(tos_dword))) {
368                                 printk(KERN_WARNING
369                                        "failed to write dword to top of the user space stack %lx!\n",
370                                        regs->EREG(sp));
371                                 return;
372                         }
373
374                         goto no_change;
375                 } else if (((insns[1] & 0x31) == 0x20) || /* jmp near, absolute
376                                                            * indirect */
377                            ((insns[1] & 0x31) == 0x21)) {
378                         /* jmp far, absolute indirect */
379                         /* eip is correct. And this is boostable */
380                         p->ainsn.boostable = 1;
381                         goto no_change;
382                 }
383         case 0xf3:
384                 if (insns[1] == 0xc3)
385                         /* repz ret special handling: no more changes */
386                         goto no_change;
387                 break;
388         default:
389                 break;
390         }
391
392         if (!write_proc_vm_atomic(current, regs->EREG(sp), &tos_dword,
393                                   sizeof(tos_dword))) {
394                 printk(KERN_WARNING
395                        "failed to write dword to top of the user space stack %lx!\n",
396                        regs->EREG(sp));
397                 return;
398         }
399
400         if (p->ainsn.boostable == 0) {
401                 if ((regs->EREG(ip) > copy_eip) && (regs->EREG(ip) - copy_eip) +
402                     5 < MAX_INSN_SIZE) {
403                         /*
404                          * These instructions can be executed directly if it
405                          * jumps back to correct address.
406                          */
407                         set_user_jmp_op((void *) regs->EREG(ip),
408                                         (void *)orig_eip +
409                                         (regs->EREG(ip) - copy_eip));
410                         p->ainsn.boostable = 1;
411                 } else {
412                         p->ainsn.boostable = -1;
413                 }
414         }
415
416         regs->EREG(ip) = orig_eip + (regs->EREG(ip) - copy_eip);
417
418 no_change:
419         return;
420 }
421
422 static bool prepare_ss_addr(struct kprobe *p, struct pt_regs *regs)
423 {
424         unsigned long *ss_addr = (long *)&p->ss_addr[smp_processor_id()];
425
426         if (*ss_addr) {
427                 regs->ip = *ss_addr;
428                 *ss_addr = 0;
429                 return true;
430         } else {
431                 regs->ip = (unsigned long)p->ainsn.insn;
432                 return false;
433         }
434 }
435
436 static void prepare_ss(struct pt_regs *regs)
437 {
438         /* set single step mode */
439         regs->flags |= TF_MASK;
440         regs->flags &= ~IF_MASK;
441 }
442
443 static int uprobe_handler(struct pt_regs *regs)
444 {
445         struct kprobe *p;
446         kprobe_opcode_t *addr;
447         struct task_struct *task = current;
448         pid_t tgid = task->tgid;
449
450         save_current_flags(regs);
451
452         addr = (kprobe_opcode_t *)(regs->EREG(ip) - sizeof(kprobe_opcode_t));
453         p = get_ukprobe(addr, tgid);
454
455         if (p == NULL) {
456                 void *tramp_addr = (void *)addr - UPROBES_TRAMP_RET_BREAK_IDX;
457
458                 p = get_ukprobe_by_insn_slot(tramp_addr, tgid, regs);
459                 if (p == NULL) {
460                         printk(KERN_INFO "no_uprobe\n");
461                         return 0;
462                 }
463
464                 trampoline_uprobe_handler(p, regs);
465                 return 1;
466         } else {
467                 if (!p->pre_handler || !p->pre_handler(p, regs)) {
468                         if (p->ainsn.boostable == 1 && !p->post_handler) {
469                                 prepare_ss_addr(p, regs);
470                                 return 1;
471                         }
472
473                         if (prepare_ss_addr(p, regs) == false) {
474                                 set_current_probe(p);
475                                 prepare_ss(regs);
476                         }
477                 }
478         }
479
480         return 1;
481 }
482
483 static int post_uprobe_handler(struct pt_regs *regs)
484 {
485         struct kprobe *p = get_current_probe();
486         unsigned long flags = current_ucb()->flags;
487
488         if (p == NULL) {
489                 printk("task[%u %u %s] current uprobe is not found\n",
490                        current->tgid, current->pid, current->comm);
491                 return 0;
492         }
493
494         resume_execution(p, regs, flags);
495         restore_current_flags(regs, flags);
496
497         /* clean stack */
498         current_ucb()->p = 0;
499         current_ucb()->flags = 0;
500
501         return 1;
502 }
503
504 static int uprobe_exceptions_notify(struct notifier_block *self,
505                                     unsigned long val, void *data)
506 {
507         struct die_args *args = (struct die_args *)data;
508         int ret = NOTIFY_DONE;
509
510         if (args->regs == NULL || !user_mode_vm(args->regs))
511                 return ret;
512
513         switch (val) {
514 #ifdef CONFIG_KPROBES
515         case DIE_INT3:
516 #else
517         case DIE_TRAP:
518 #endif
519                 if (uprobe_handler(args->regs))
520                         ret = NOTIFY_STOP;
521                 break;
522         case DIE_DEBUG:
523                 if (post_uprobe_handler(args->regs))
524                         ret = NOTIFY_STOP;
525                 break;
526         default:
527                 break;
528         }
529
530         return ret;
531 }
532
533 static struct notifier_block uprobe_exceptions_nb = {
534         .notifier_call = uprobe_exceptions_notify,
535         .priority = INT_MAX
536 };
537
538 /**
539  * @brief Registers notify.
540  *
541  * @return register_die_notifier result.
542  */
543 int swap_arch_init_uprobes(void)
544 {
545         return register_die_notifier(&uprobe_exceptions_nb);
546 }
547
548 /**
549  * @brief Unregisters notify.
550  *
551  * @return Void.
552  */
553 void swap_arch_exit_uprobes(void)
554 {
555         unregister_die_notifier(&uprobe_exceptions_nb);
556 }
557