ipipe: add generic pipeline core
authorPhilippe Gerum <rpm@xenomai.org>
Sun, 3 Dec 2017 11:11:44 +0000 (12:11 +0100)
committerMarek Szyprowski <m.szyprowski@samsung.com>
Fri, 27 Apr 2018 09:21:34 +0000 (11:21 +0200)
26 files changed:
include/asm-generic/ipipe.h [new file with mode: 0644]
include/asm-generic/percpu.h
include/ipipe/setup.h [new file with mode: 0644]
include/ipipe/thread_info.h [new file with mode: 0644]
include/linux/ipipe.h [new file with mode: 0644]
include/linux/ipipe_base.h [new file with mode: 0644]
include/linux/ipipe_debug.h [new file with mode: 0644]
include/linux/ipipe_domain.h [new file with mode: 0644]
include/linux/ipipe_lock.h [new file with mode: 0644]
include/linux/ipipe_tickdev.h [new file with mode: 0644]
include/linux/ipipe_trace.h [new file with mode: 0644]
include/linux/preempt.h
init/Kconfig
init/main.c
kernel/Makefile
kernel/ipipe/Kconfig [new file with mode: 0644]
kernel/ipipe/Kconfig.debug [new file with mode: 0644]
kernel/ipipe/Makefile [new file with mode: 0644]
kernel/ipipe/core.c [new file with mode: 0644]
kernel/ipipe/timer.c [new file with mode: 0644]
kernel/ipipe/tracer.c [new file with mode: 0644]
kernel/panic.c
kernel/rcu/Kconfig.debug
kernel/sched/core.c
kernel/sched/idle.c
lib/Kconfig.debug

diff --git a/include/asm-generic/ipipe.h b/include/asm-generic/ipipe.h
new file mode 100644 (file)
index 0000000..6c19c76
--- /dev/null
@@ -0,0 +1,72 @@
+/* -*- linux-c -*-
+ * include/asm-generic/ipipe.h
+ *
+ * Copyright (C) 2002-2017 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ */
+#ifndef __ASM_GENERIC_IPIPE_H
+#define __ASM_GENERIC_IPIPE_H
+
+#ifdef CONFIG_IPIPE
+
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
+       defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
+void __ipipe_uaccess_might_fault(void);
+#else
+#define __ipipe_uaccess_might_fault() might_fault()
+#endif
+
+#define hard_cond_local_irq_enable()           hard_local_irq_enable()
+#define hard_cond_local_irq_disable()          hard_local_irq_disable()
+#define hard_cond_local_irq_save()             hard_local_irq_save()
+#define hard_cond_local_irq_restore(flags)     hard_local_irq_restore(flags)
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+void ipipe_root_only(void);
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+static inline void ipipe_root_only(void) { }
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+void ipipe_stall_root(void);
+
+void ipipe_unstall_root(void);
+
+unsigned long ipipe_test_and_stall_root(void);
+
+unsigned long ipipe_test_root(void);
+
+void ipipe_restore_root(unsigned long x);
+
+#else  /* !CONFIG_IPIPE */
+
+#define hard_local_irq_save()          arch_local_irq_save()
+#define hard_local_irq_restore(x)      arch_local_irq_restore(x)
+#define hard_local_irq_enable()                arch_local_irq_enable()
+#define hard_local_irq_disable()       arch_local_irq_disable()
+#define hard_irqs_disabled()           irqs_disabled()
+
+#define hard_cond_local_irq_enable()           do { } while(0)
+#define hard_cond_local_irq_disable()          do { } while(0)
+#define hard_cond_local_irq_save()             0
+#define hard_cond_local_irq_restore(flags)     do { (void)(flags); } while(0)
+
+#define __ipipe_uaccess_might_fault()          might_fault()
+
+static inline void ipipe_root_only(void) { }
+
+#endif /* !CONFIG_IPIPE */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
+#define hard_smp_local_irq_save()              hard_local_irq_save()
+#define hard_smp_local_irq_restore(flags)      hard_local_irq_restore(flags)
+#else /* !CONFIG_SMP */
+#define hard_smp_local_irq_save()              0
+#define hard_smp_local_irq_restore(flags)      do { (void)(flags); } while(0)
+#endif /* CONFIG_SMP */
+
+#endif
index 1817a8415a5e82a7bc526a38361cea88566e62bb..848c7b44278304802b7011e2066bfc0053ac38f9 100644 (file)
@@ -44,11 +44,33 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
 #endif
 
+#ifdef CONFIG_IPIPE
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+extern int __ipipe_check_percpu_access(void);
+#define __ipipe_cpu_offset                                     \
+       ({                                                      \
+               WARN_ON_ONCE(__ipipe_check_percpu_access());    \
+               __my_cpu_offset;                                \
+       })
+#else
+#define __ipipe_cpu_offset  __my_cpu_offset
+#endif
+#ifndef __ipipe_raw_cpu_ptr
+#define __ipipe_raw_cpu_ptr(ptr)  SHIFT_PERCPU_PTR(ptr, __ipipe_cpu_offset)
+#endif
+#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var)))
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
 #endif
 
-#endif /* SMP */
+#else /* !SMP */
+
+#define __ipipe_raw_cpu_ptr(ptr)  VERIFY_PERCPU_PTR(ptr)
+#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var)))
+
+#endif /* !SMP */
 
 #ifndef PER_CPU_BASE_SECTION
 #ifdef CONFIG_SMP
@@ -148,9 +170,9 @@ do {                                                                        \
 #define this_cpu_generic_to_op(pcp, val, op)                           \
 do {                                                                   \
        unsigned long __flags;                                          \
-       raw_local_irq_save(__flags);                                    \
+       __flags = hard_local_irq_save();                                \
        raw_cpu_generic_to_op(pcp, val, op);                            \
-       raw_local_irq_restore(__flags);                                 \
+       hard_local_irq_restore(__flags);                                \
 } while (0)
 
 
@@ -158,9 +180,9 @@ do {                                                                        \
 ({                                                                     \
        typeof(pcp) __ret;                                              \
        unsigned long __flags;                                          \
-       raw_local_irq_save(__flags);                                    \
+       __flags = hard_local_irq_save();                                \
        __ret = raw_cpu_generic_add_return(pcp, val);                   \
-       raw_local_irq_restore(__flags);                                 \
+       hard_local_irq_restore(__flags);                                \
        __ret;                                                          \
 })
 
@@ -168,9 +190,9 @@ do {                                                                        \
 ({                                                                     \
        typeof(pcp) __ret;                                              \
        unsigned long __flags;                                          \
-       raw_local_irq_save(__flags);                                    \
+       __flags = hard_local_irq_save();                                \
        __ret = raw_cpu_generic_xchg(pcp, nval);                        \
-       raw_local_irq_restore(__flags);                                 \
+       hard_local_irq_restore(__flags);                                \
        __ret;                                                          \
 })
 
@@ -178,9 +200,9 @@ do {                                                                        \
 ({                                                                     \
        typeof(pcp) __ret;                                              \
        unsigned long __flags;                                          \
-       raw_local_irq_save(__flags);                                    \
+       __flags = hard_local_irq_save();                                \
        __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval);               \
-       raw_local_irq_restore(__flags);                                 \
+       hard_local_irq_restore(__flags);                                \
        __ret;                                                          \
 })
 
@@ -188,10 +210,10 @@ do {                                                                      \
 ({                                                                     \
        int __ret;                                                      \
        unsigned long __flags;                                          \
-       raw_local_irq_save(__flags);                                    \
+       __flags = hard_local_irq_save();                                \
        __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,              \
                        oval1, oval2, nval1, nval2);                    \
-       raw_local_irq_restore(__flags);                                 \
+       hard_local_irq_restore(__flags);                                \
        __ret;                                                          \
 })
 
diff --git a/include/ipipe/setup.h b/include/ipipe/setup.h
new file mode 100644 (file)
index 0000000..c2bc521
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef _IPIPE_SETUP_H
+#define _IPIPE_SETUP_H
+
+/*
+ * Placeholders for setup hooks defined by client domains.
+ */
+
+static inline void __ipipe_early_client_setup(void) { }
+
+#endif /* !_IPIPE_SETUP_H */
diff --git a/include/ipipe/thread_info.h b/include/ipipe/thread_info.h
new file mode 100644 (file)
index 0000000..7038c12
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _IPIPE_THREAD_INFO_H
+#define _IPIPE_THREAD_INFO_H
+
+/*
+ * Placeholder for private thread information defined by client
+ * domains.
+ */
+
+struct ipipe_threadinfo {
+};
+
+#define __ipipe_init_threadinfo(__p) do { } while (0)
+
+#endif /* !_IPIPE_THREAD_INFO_H */
diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h
new file mode 100644 (file)
index 0000000..6621c4a
--- /dev/null
@@ -0,0 +1,447 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe.h
+ *
+ * Copyright (C) 2002-2014 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_H
+#define __LINUX_IPIPE_H
+
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/irq.h>
+#include <linux/thread_info.h>
+#include <linux/ipipe_base.h>
+#include <linux/ipipe_debug.h>
+#include <asm/ptrace.h>
+#include <asm/ipipe.h>
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/ipipe_domain.h>
+
+/* ipipe_set_hooks(..., enables) */
+#define IPIPE_SYSCALL  __IPIPE_SYSCALL_E
+#define IPIPE_TRAP     __IPIPE_TRAP_E
+#define IPIPE_KEVENT   __IPIPE_KEVENT_E
+
+struct ipipe_sysinfo {
+       int sys_nr_cpus;        /* Number of CPUs on board */
+       int sys_hrtimer_irq;    /* hrtimer device IRQ */
+       u64 sys_hrtimer_freq;   /* hrtimer device frequency */
+       u64 sys_hrclock_freq;   /* hrclock device frequency */
+       u64 sys_cpu_freq;       /* CPU frequency (Hz) */
+       struct ipipe_arch_sysinfo arch;
+};
+
+struct ipipe_work_header {
+       size_t size;
+       void (*handler)(struct ipipe_work_header *work);
+};
+
+extern unsigned int __ipipe_printk_virq;
+
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq);
+
+void __ipipe_share_current(int flags);
+
+void __ipipe_arch_share_current(int flags);
+
+int __ipipe_disable_ondemand_mappings(struct task_struct *p);
+
+int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma);
+
+#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
+
+#define prepare_arch_switch(next)                      \
+       do {                                            \
+               hard_local_irq_enable();                \
+               __ipipe_report_schedule(current, next); \
+       } while(0)
+
+#ifndef ipipe_get_active_mm
+static inline struct mm_struct *ipipe_get_active_mm(void)
+{
+       return __this_cpu_read(ipipe_percpu.active_mm);
+}
+#define ipipe_get_active_mm ipipe_get_active_mm
+#endif
+
+#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
+
+#define prepare_arch_switch(next)                      \
+       do {                                            \
+               __ipipe_report_schedule(current, next); \
+               hard_local_irq_disable();               \
+       } while(0)
+
+#ifndef ipipe_get_active_mm
+#define ipipe_get_active_mm()  (current->active_mm)
+#endif
+
+#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
+
+#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE
+
+extern unsigned long long __ipipe_cs_freq;
+
+extern struct clocksource *__ipipe_cs;
+
+#endif /* CONFIG_IPIPE_WANT_CLOCKSOURCE */
+
+static inline bool __ipipe_hrclock_ok(void)
+{
+       return __ipipe_hrclock_freq != 0;
+}
+
+static inline void __ipipe_nmi_enter(void)
+{
+       __this_cpu_write(ipipe_percpu.nmi_state, __ipipe_root_status);
+       __set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
+       ipipe_save_context_nmi();
+}
+
+static inline void __ipipe_nmi_exit(void)
+{
+       ipipe_restore_context_nmi();
+       if (!test_bit(IPIPE_STALL_FLAG, raw_cpu_ptr(&ipipe_percpu.nmi_state)))
+               __clear_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
+}
+
+/* KVM-side calls, hw IRQs off. */
+static inline void __ipipe_enter_vm(struct ipipe_vm_notifier *vmf)
+{
+       struct ipipe_percpu_data *p;
+
+       p = raw_cpu_ptr(&ipipe_percpu);
+       p->vm_notifier = vmf;
+       barrier();
+}
+
+static inline void __ipipe_exit_vm(void)
+{
+       struct ipipe_percpu_data *p;
+
+       p = raw_cpu_ptr(&ipipe_percpu);
+       p->vm_notifier = NULL;
+       barrier();
+}
+
+/* Client-side call, hw IRQs off. */
+void __ipipe_notify_vm_preemption(void);
+
+static inline void __ipipe_sync_pipeline(struct ipipe_domain *top)
+{
+       if (__ipipe_current_domain != top) {
+               __ipipe_do_sync_pipeline(top);
+               return;
+       }
+       if (!test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(top)->status))
+               __ipipe_sync_stage();
+}
+
+void ipipe_register_head(struct ipipe_domain *ipd,
+                        const char *name);
+
+void ipipe_unregister_head(struct ipipe_domain *ipd);
+
+int ipipe_request_irq(struct ipipe_domain *ipd,
+                     unsigned int irq,
+                     ipipe_irq_handler_t handler,
+                     void *cookie,
+                     ipipe_irq_ackfn_t ackfn);
+
+void ipipe_free_irq(struct ipipe_domain *ipd,
+                   unsigned int irq);
+
+void ipipe_raise_irq(unsigned int irq);
+
+int ipipe_handle_syscall(struct thread_info *ti,
+                        unsigned long nr, struct pt_regs *regs);
+
+void ipipe_set_hooks(struct ipipe_domain *ipd,
+                    int enables);
+
+unsigned int ipipe_alloc_virq(void);
+
+void ipipe_free_virq(unsigned int virq);
+
+static inline void ipipe_post_irq_head(unsigned int irq)
+{
+       __ipipe_set_irq_pending(ipipe_head_domain, irq);
+}
+
+static inline void ipipe_post_irq_root(unsigned int irq)
+{
+       __ipipe_set_irq_pending(&ipipe_root, irq);
+}
+
+static inline void ipipe_stall_head(void)
+{
+       hard_local_irq_disable();
+       __set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
+}
+
+static inline unsigned long ipipe_test_and_stall_head(void)
+{
+       hard_local_irq_disable();
+       return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
+}
+
+static inline unsigned long ipipe_test_head(void)
+{
+       unsigned long flags, ret;
+
+       flags = hard_smp_local_irq_save();
+       ret = test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
+       hard_smp_local_irq_restore(flags);
+
+       return ret;
+}
+
+void ipipe_unstall_head(void);
+
+void __ipipe_restore_head(unsigned long x);
+
+static inline void ipipe_restore_head(unsigned long x)
+{
+       ipipe_check_irqoff();
+       if ((x ^ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)) & 1)
+               __ipipe_restore_head(x);
+}
+
+void __ipipe_post_work_root(struct ipipe_work_header *work);
+
+#define ipipe_post_work_root(p, header)                        \
+       do {                                            \
+               void header_not_at_start(void);         \
+               if (offsetof(typeof(*(p)), header)) {   \
+                       header_not_at_start();          \
+               }                                       \
+               __ipipe_post_work_root(&(p)->header);   \
+       } while (0)
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
+
+unsigned long ipipe_critical_enter(void (*syncfn)(void));
+
+void ipipe_critical_exit(unsigned long flags);
+
+void ipipe_prepare_panic(void);
+
+#ifdef CONFIG_SMP
+#ifndef ipipe_smp_p
+#define ipipe_smp_p (1)
+#endif
+void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask);
+void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask);
+#else  /* !CONFIG_SMP */
+#define ipipe_smp_p (0)
+static inline
+void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) { }
+static inline void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) { }
+static inline void ipipe_disable_smp(void) { }
+#endif /* CONFIG_SMP */
+
+static inline void ipipe_restore_root_nosync(unsigned long x)
+{
+       unsigned long flags;
+
+       flags = hard_smp_local_irq_save();
+       __ipipe_restore_root_nosync(x);
+       hard_smp_local_irq_restore(flags);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_lock_irq(unsigned int irq)
+{
+       struct ipipe_domain *ipd = __ipipe_current_domain;
+       if (ipd == ipipe_root_domain)
+               __ipipe_lock_irq(irq);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_unlock_irq(unsigned int irq)
+{
+       struct ipipe_domain *ipd = __ipipe_current_domain;
+       if (ipd == ipipe_root_domain)
+               __ipipe_unlock_irq(irq);
+}
+
+static inline struct ipipe_threadinfo *ipipe_current_threadinfo(void)
+{
+       return &current_thread_info()->ipipe_data;
+}
+
+#define ipipe_task_threadinfo(p) (&task_thread_info(p)->ipipe_data)
+
+void ipipe_enable_irq(unsigned int irq);
+
+static inline void ipipe_disable_irq(unsigned int irq)
+{
+       struct irq_desc *desc;
+       struct irq_chip *chip;
+
+       desc = irq_to_desc(irq);
+       if (desc == NULL)
+               return;
+
+       chip = irq_desc_get_chip(desc);
+
+       if (WARN_ON_ONCE(chip->irq_disable == NULL && chip->irq_mask == NULL))
+               return;
+
+       if (chip->irq_disable)
+               chip->irq_disable(&desc->irq_data);
+       else
+               chip->irq_mask(&desc->irq_data);
+}
+
+static inline void ipipe_end_irq(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc)
+               desc->ipipe_end(desc);
+}
+
+static inline int ipipe_chained_irq_p(struct irq_desc *desc)
+{
+       void __ipipe_chained_irq(struct irq_desc *desc);
+
+       return desc->handle_irq == __ipipe_chained_irq;
+}
+
+static inline void ipipe_handle_demuxed_irq(unsigned int cascade_irq)
+{
+       ipipe_trace_irq_entry(cascade_irq);
+       __ipipe_dispatch_irq(cascade_irq, IPIPE_IRQF_NOSYNC);
+       ipipe_trace_irq_exit(cascade_irq);
+}
+
+static inline void __ipipe_init_threadflags(struct thread_info *ti)
+{
+       ti->ipipe_flags = 0;
+}
+
+static inline
+void ipipe_set_ti_thread_flag(struct thread_info *ti, int flag)
+{
+       set_bit(flag, &ti->ipipe_flags);
+}
+
+static inline
+void ipipe_clear_ti_thread_flag(struct thread_info *ti, int flag)
+{
+       clear_bit(flag, &ti->ipipe_flags);
+}
+
+static inline
+void ipipe_test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
+{
+       test_and_clear_bit(flag, &ti->ipipe_flags);
+}
+
+static inline
+int ipipe_test_ti_thread_flag(struct thread_info *ti, int flag)
+{
+       return test_bit(flag, &ti->ipipe_flags);
+}
+
+#define ipipe_set_thread_flag(flag) \
+       ipipe_set_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_clear_thread_flag(flag) \
+       ipipe_clear_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_test_and_clear_thread_flag(flag) \
+       ipipe_test_and_clear_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_test_thread_flag(flag) \
+       ipipe_test_ti_thread_flag(current_thread_info(), flag)
+
+#define ipipe_enable_notifier(p)                                       \
+       ipipe_set_ti_thread_flag(task_thread_info(p), TIP_NOTIFY)
+
+#define ipipe_disable_notifier(p)                                      \
+       do {                                                            \
+               struct thread_info *ti = task_thread_info(p);           \
+               ipipe_clear_ti_thread_flag(ti, TIP_NOTIFY);             \
+               ipipe_clear_ti_thread_flag(ti, TIP_MAYDAY);             \
+       } while (0)
+
+#define ipipe_notifier_enabled_p(p)                                    \
+       ipipe_test_ti_thread_flag(task_thread_info(p), TIP_NOTIFY)
+
+#define ipipe_raise_mayday(p)                                          \
+       do {                                                            \
+               struct thread_info *ti = task_thread_info(p);           \
+               ipipe_check_irqoff();                                   \
+               if (ipipe_test_ti_thread_flag(ti, TIP_NOTIFY))          \
+                       ipipe_set_ti_thread_flag(ti, TIP_MAYDAY);       \
+       } while (0)
+
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_tracer_hrclock_initialized(void);
+#else /* !CONFIG_IPIPE_TRACE */
+#define __ipipe_tracer_hrclock_initialized()   do { } while(0)
+#endif /* !CONFIG_IPIPE_TRACE */
+
+int ipipe_get_domain_slope_hook(struct task_struct *prev,
+                               struct task_struct *next);
+
+#else  /* !CONFIG_IPIPE */
+
+#define __ipipe_root_p         1
+#define ipipe_root_p           1
+
+static inline void __ipipe_init_threadflags(struct thread_info *ti) { }
+
+static inline void __ipipe_nmi_enter(void) { }
+
+static inline void __ipipe_nmi_exit(void) { }
+
+#define ipipe_safe_current()   current
+#define ipipe_processor_id()   smp_processor_id()
+
+static inline int ipipe_test_foreign_stack(void)
+{
+       return 0;
+}
+
+static inline void ipipe_lock_irq(unsigned int irq) { }
+
+static inline void ipipe_unlock_irq(unsigned int irq) { }
+
+static inline int ipipe_handle_syscall(struct thread_info *ti,
+                                      unsigned long nr, struct pt_regs *regs)
+{
+       return 0;
+}
+
+static inline
+int ipipe_get_domain_slope_hook(struct task_struct *prev,
+                               struct task_struct *next)
+{
+       return 0;
+}
+
+#endif /* !CONFIG_IPIPE */
+
+#endif /* !__LINUX_IPIPE_H */
diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h
new file mode 100644 (file)
index 0000000..f476c5a
--- /dev/null
@@ -0,0 +1,243 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_base.h
+ *
+ * Copyright (C) 2002-2014 Philippe Gerum.
+ *               2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_BASE_H
+#define __LINUX_IPIPE_BASE_H
+
+struct kvm_vcpu;
+struct ipipe_vm_notifier;
+struct irq_desc;
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_CORE_APIREV  CONFIG_IPIPE_CORE_APIREV
+
+#include <linux/ipipe_domain.h>
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <asm/ipipe_base.h>
+
+struct pt_regs;
+struct ipipe_domain;
+
+struct ipipe_trap_data {
+       int exception;
+       struct pt_regs *regs;
+};
+
+struct ipipe_vm_notifier {
+       void (*handler)(struct ipipe_vm_notifier *nfy);
+};
+
+static inline int ipipe_virtual_irq_p(unsigned int irq)
+{
+       return irq >= IPIPE_VIRQ_BASE && irq < IPIPE_NR_IRQS;
+}
+
+void __ipipe_init_early(void);
+
+void __ipipe_init(void);
+
+#ifdef CONFIG_PROC_FS
+void __ipipe_init_proc(void);
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_init_tracer(void);
+#else /* !CONFIG_IPIPE_TRACE */
+static inline void __ipipe_init_tracer(void) { }
+#endif /* CONFIG_IPIPE_TRACE */
+#else  /* !CONFIG_PROC_FS */
+static inline void __ipipe_init_proc(void) { }
+#endif /* CONFIG_PROC_FS */
+
+void __ipipe_restore_root_nosync(unsigned long x);
+
+#define IPIPE_IRQF_NOACK    0x1
+#define IPIPE_IRQF_NOSYNC   0x2
+
+void __ipipe_dispatch_irq(unsigned int irq, int flags);
+
+void __ipipe_do_sync_stage(void);
+
+void __ipipe_do_sync_pipeline(struct ipipe_domain *top);
+
+void __ipipe_lock_irq(unsigned int irq);
+
+void __ipipe_unlock_irq(unsigned int irq);
+
+void __ipipe_do_critical_sync(unsigned int irq, void *cookie);
+
+void __ipipe_ack_edge_irq(struct irq_desc *desc);
+
+void __ipipe_nop_irq(struct irq_desc *desc);
+
+static inline void __ipipe_idle(void)
+{
+       ipipe_unstall_root();
+}
+
+#ifndef __ipipe_sync_check
+#define __ipipe_sync_check     1
+#endif
+
+static inline void __ipipe_sync_stage(void)
+{
+       if (likely(__ipipe_sync_check))
+               __ipipe_do_sync_stage();
+}
+
+#ifndef __ipipe_run_irqtail
+#define __ipipe_run_irqtail(irq) do { } while(0)
+#endif
+
+int __ipipe_log_printk(const char *fmt, va_list args);
+void __ipipe_flush_printk(unsigned int irq, void *cookie);
+
+#define __ipipe_get_cpu(flags) ({ (flags) = hard_preempt_disable(); ipipe_processor_id(); })
+#define __ipipe_put_cpu(flags) hard_preempt_enable(flags)
+
+int __ipipe_notify_syscall(struct pt_regs *regs);
+
+int __ipipe_notify_trap(int exception, struct pt_regs *regs);
+
+int __ipipe_notify_kevent(int event, void *data);
+
+#define __ipipe_report_trap(exception, regs)                           \
+       __ipipe_notify_trap(exception, regs)
+
+#define __ipipe_report_sigwake(p)                                      \
+       do {                                                            \
+               if (ipipe_notifier_enabled_p(p))                        \
+                       __ipipe_notify_kevent(IPIPE_KEVT_SIGWAKE, p);   \
+       } while (0)
+
+struct ipipe_cpu_migration_data {
+       struct task_struct *task;
+       int dest_cpu;
+};
+
+#define __ipipe_report_setaffinity(__p, __dest_cpu)                    \
+       do {                                                            \
+               struct ipipe_cpu_migration_data d = {                   \
+                       .task = (__p),                                  \
+                       .dest_cpu = (__dest_cpu),                       \
+               };                                                      \
+               if (ipipe_notifier_enabled_p(__p))                      \
+                       __ipipe_notify_kevent(IPIPE_KEVT_SETAFFINITY, &d); \
+       } while (0)
+
+#define __ipipe_report_exit(p)                                         \
+       do {                                                            \
+               if (ipipe_notifier_enabled_p(p))                        \
+                       __ipipe_notify_kevent(IPIPE_KEVT_EXIT, p);      \
+       } while (0)
+
+#define __ipipe_report_setsched(p)                                     \
+       do {                                                            \
+               if (ipipe_notifier_enabled_p(p))                        \
+                       __ipipe_notify_kevent(IPIPE_KEVT_SETSCHED, p); \
+       } while (0)
+
+#define __ipipe_report_schedule(prev, next)                            \
+do {                                                                   \
+       if (ipipe_notifier_enabled_p(next) ||                           \
+           ipipe_notifier_enabled_p(prev)) {                           \
+               __this_cpu_write(ipipe_percpu.rqlock_owner, prev);      \
+               __ipipe_notify_kevent(IPIPE_KEVT_SCHEDULE, next);       \
+       }                                                               \
+} while (0)
+
+#define __ipipe_report_cleanup(mm)                                     \
+       __ipipe_notify_kevent(IPIPE_KEVT_CLEANUP, mm)
+
+#define __ipipe_report_clockfreq_update(freq)                          \
+       __ipipe_notify_kevent(IPIPE_KEVT_CLOCKFREQ, &(freq))
+
+void __ipipe_notify_vm_preemption(void);
+
+void __ipipe_call_mayday(struct pt_regs *regs);
+
+static inline void __ipipe_init_taskinfo(struct task_struct *p) { }
+
+#define __ipipe_serial_debug(__fmt, __args...) raw_printk(__fmt, ##__args)
+
+#else /* !CONFIG_IPIPE */
+
+struct task_struct;
+struct mm_struct;
+
+static inline void __ipipe_init_early(void) { }
+
+static inline void __ipipe_init(void) { }
+
+static inline void __ipipe_init_proc(void) { }
+
+static inline void __ipipe_idle(void) { }
+
+static inline void __ipipe_report_sigwake(struct task_struct *p) { }
+
+static inline void __ipipe_report_setaffinity(struct task_struct *p,
+                                             int dest_cpu) { }
+
+static inline void __ipipe_report_setsched(struct task_struct *p) { }
+
+static inline void __ipipe_report_exit(struct task_struct *p) { }
+
+static inline void __ipipe_report_cleanup(struct mm_struct *mm) { }
+
+#define __ipipe_report_trap(exception, regs)  0
+
+static inline void __ipipe_init_taskinfo(struct task_struct *p) { }
+
+#define hard_preempt_disable()         ({ preempt_disable(); 0; })
+#define hard_preempt_enable(flags)     ({ preempt_enable(); (void)(flags); })
+
+#define __ipipe_get_cpu(flags)         ({ (void)(flags); get_cpu(); })
+#define __ipipe_put_cpu(flags)         \
+       do {                            \
+               (void)(flags);          \
+               put_cpu();              \
+       } while (0)
+
+#define __ipipe_root_tick_p(regs)      1
+
+#define ipipe_handle_demuxed_irq(irq)          generic_handle_irq(irq)
+
+#define __ipipe_enter_vm(vmf)  do { } while (0)
+
+static inline void __ipipe_exit_vm(void) { }
+
+static inline void __ipipe_notify_vm_preemption(void) { }
+
+#define __ipipe_serial_debug(__fmt, __args...) do { } while (0)
+
+#endif /* !CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_WANT_PTE_PINNING
+void __ipipe_pin_mapping_globally(unsigned long start,
+                                 unsigned long end);
+#else
+static inline void __ipipe_pin_mapping_globally(unsigned long start,
+                                               unsigned long end)
+{ }
+#endif
+
+#endif /* !__LINUX_IPIPE_BASE_H */
diff --git a/include/linux/ipipe_debug.h b/include/linux/ipipe_debug.h
new file mode 100644 (file)
index 0000000..5d7efef
--- /dev/null
@@ -0,0 +1,100 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_debug.h
+ *
+ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_DEBUG_H
+#define __LINUX_IPIPE_DEBUG_H
+
+#include <linux/ipipe_domain.h>
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+#include <asm/bug.h>
+
+static inline int ipipe_disable_context_check(void)
+{
+       return xchg(raw_cpu_ptr(&ipipe_percpu.context_check), 0);
+}
+
+static inline void ipipe_restore_context_check(int old_state)
+{
+       __this_cpu_write(ipipe_percpu.context_check, old_state);
+}
+
+static inline void ipipe_context_check_off(void)
+{
+       int cpu;
+       for_each_online_cpu(cpu)
+               per_cpu(ipipe_percpu, cpu).context_check = 0;
+}
+
+static inline void ipipe_save_context_nmi(void)
+{
+       int state = ipipe_disable_context_check();
+       __this_cpu_write(ipipe_percpu.context_check_saved, state);
+}
+
+static inline void ipipe_restore_context_nmi(void)
+{
+       ipipe_restore_context_check(__this_cpu_read(ipipe_percpu.context_check_saved));
+}
+
+#else  /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+static inline int ipipe_disable_context_check(void)
+{
+       return 0;
+}
+
+static inline void ipipe_restore_context_check(int old_state) { }
+
+static inline void ipipe_context_check_off(void) { }
+
+static inline void ipipe_save_context_nmi(void) { }
+
+static inline void ipipe_restore_context_nmi(void) { }
+
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#ifdef CONFIG_IPIPE_DEBUG
+
+#define ipipe_check_irqoff()                                   \
+       do {                                                    \
+               if (WARN_ON_ONCE(!hard_irqs_disabled()))        \
+                       hard_local_irq_disable();               \
+       } while (0)
+
+#else /* !CONFIG_IPIPE_DEBUG */
+
+static inline void ipipe_check_irqoff(void) { }
+
+#endif /* !CONFIG_IPIPE_DEBUG */
+
+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
+#define IPIPE_WARN(c)          WARN_ON(c)
+#define IPIPE_WARN_ONCE(c)     WARN_ON_ONCE(c)
+#define IPIPE_BUG_ON(c)                BUG_ON(c)
+#else
+#define IPIPE_WARN(c)          do { (void)(c); } while (0)
+#define IPIPE_WARN_ONCE(c)     do { (void)(c); } while (0)
+#define IPIPE_BUG_ON(c)                do { (void)(c); } while (0)
+#endif
+
+#endif /* !__LINUX_IPIPE_DEBUG_H */
diff --git a/include/linux/ipipe_domain.h b/include/linux/ipipe_domain.h
new file mode 100644 (file)
index 0000000..e03e97b
--- /dev/null
@@ -0,0 +1,357 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_domain.h
+ *
+ *   Copyright (C) 2007-2012 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_DOMAIN_H
+#define __LINUX_IPIPE_DOMAIN_H
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
+
+struct task_struct;
+struct mm_struct;
+struct irq_desc;
+struct ipipe_vm_notifier;
+
+#define __bpl_up(x)            (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1))
+/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */
+#define IPIPE_NR_VIRQS         BITS_PER_LONG
+/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */
+#define IPIPE_VIRQ_BASE                __bpl_up(IPIPE_NR_XIRQS)
+/* Total number of IRQ slots */
+#define IPIPE_NR_IRQS          (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS)
+
+#define IPIPE_IRQ_LOMAPSZ      (IPIPE_NR_IRQS / BITS_PER_LONG)
+#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG
+/*
+ * We need a 3-level mapping. This allows us to handle up to 32k IRQ
+ * vectors on 32bit machines, 256k on 64bit ones.
+ */
+#define __IPIPE_3LEVEL_IRQMAP  1
+#define IPIPE_IRQ_MDMAPSZ      (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG)
+#else
+/*
+ * 2-level mapping is enough. This allows us to handle up to 1024 IRQ
+ * vectors on 32bit machines, 4096 on 64bit ones.
+ */
+#define __IPIPE_2LEVEL_IRQMAP  1
+#endif
+
+/* Per-cpu pipeline status */
+#define IPIPE_STALL_FLAG       0 /* interrupts (virtually) disabled. */
+#define IPIPE_STALL_MASK       (1L << IPIPE_STALL_FLAG)
+
+/* Interrupt control bits */
+#define IPIPE_HANDLE_FLAG      0
+#define IPIPE_STICKY_FLAG      1
+#define IPIPE_LOCK_FLAG                2
+#define IPIPE_HANDLE_MASK      (1 << IPIPE_HANDLE_FLAG)
+#define IPIPE_STICKY_MASK      (1 << IPIPE_STICKY_FLAG)
+#define IPIPE_LOCK_MASK                (1 << IPIPE_LOCK_FLAG)
+
+#define __IPIPE_SYSCALL_P  0
+#define __IPIPE_TRAP_P     1
+#define __IPIPE_KEVENT_P   2
+#define __IPIPE_SYSCALL_E (1 << __IPIPE_SYSCALL_P)
+#define __IPIPE_TRAP_E   (1 << __IPIPE_TRAP_P)
+#define __IPIPE_KEVENT_E  (1 << __IPIPE_KEVENT_P)
+#define __IPIPE_ALL_E     0x7
+#define __IPIPE_SYSCALL_R (8 << __IPIPE_SYSCALL_P)
+#define __IPIPE_TRAP_R   (8 << __IPIPE_TRAP_P)
+#define __IPIPE_KEVENT_R  (8 << __IPIPE_KEVENT_P)
+#define __IPIPE_SHIFT_R           3
+#define __IPIPE_ALL_R    (__IPIPE_ALL_E << __IPIPE_SHIFT_R)
+
+#define IPIPE_KEVT_SCHEDULE    0
+#define IPIPE_KEVT_SIGWAKE     1
+#define IPIPE_KEVT_SETSCHED    2
+#define IPIPE_KEVT_SETAFFINITY 3
+#define IPIPE_KEVT_EXIT                4
+#define IPIPE_KEVT_CLEANUP     5
+#define IPIPE_KEVT_HOSTRT      6
+#define IPIPE_KEVT_CLOCKFREQ   7
+
+typedef void (*ipipe_irq_ackfn_t)(struct irq_desc *desc);
+
+typedef void (*ipipe_irq_handler_t)(unsigned int irq,
+                                   void *cookie);
+
+struct ipipe_domain {
+       int context_offset;
+       struct ipipe_irqdesc {
+               unsigned long control;
+               ipipe_irq_ackfn_t ackfn;
+               ipipe_irq_handler_t handler;
+               void *cookie;
+       } ____cacheline_aligned irqs[IPIPE_NR_IRQS];
+       const char *name;
+       struct mutex mutex;
+};
+
+static inline void *
+__ipipe_irq_cookie(struct ipipe_domain *ipd, unsigned int irq)
+{
+       return ipd->irqs[irq].cookie;
+}
+
+static inline ipipe_irq_handler_t
+__ipipe_irq_handler(struct ipipe_domain *ipd, unsigned int irq)
+{
+       return ipd->irqs[irq].handler;
+}
+
+extern struct ipipe_domain ipipe_root;
+
+#define ipipe_root_domain (&ipipe_root)
+
+extern struct ipipe_domain *ipipe_head_domain;
+
+struct ipipe_percpu_domain_data {
+       unsigned long status;   /* <= Must be first in struct. */
+       unsigned long irqpend_himap;
+#ifdef __IPIPE_3LEVEL_IRQMAP
+       unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ];
+#endif
+       unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ];
+       unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ];
+       unsigned long irqall[IPIPE_NR_IRQS];
+       struct ipipe_domain *domain;
+       int coflags;
+};
+
+struct ipipe_percpu_data {
+       struct ipipe_percpu_domain_data root;
+       struct ipipe_percpu_domain_data head;
+       struct ipipe_percpu_domain_data *curr;
+       struct pt_regs tick_regs;
+       int hrtimer_irq;
+       struct task_struct *task_hijacked;
+       struct task_struct *rqlock_owner;
+       struct ipipe_vm_notifier *vm_notifier;
+       unsigned long nmi_state;
+       struct mm_struct *active_mm;
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+       int context_check;
+       int context_check_saved;
+#endif
+};
+
+/*
+ * CAREFUL: all accessors based on __ipipe_raw_cpu_ptr() you may find
+ * in this file should be used only while hw interrupts are off, to
+ * prevent from CPU migration regardless of the running domain.
+ */
+DECLARE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu);
+
+static inline struct ipipe_percpu_domain_data *
+__context_of(struct ipipe_percpu_data *p, struct ipipe_domain *ipd)
+{
+       return (void *)p + ipd->context_offset;
+}
+
+/**
+ * ipipe_percpu_context - return the address of the pipeline context
+ * data for a domain on a given CPU.
+ *
+ * NOTE: this is the slowest accessor, use it carefully. Prefer
+ * ipipe_this_cpu_context() for requests targeted at the current
+ * CPU. Additionally, if the target domain is known at build time,
+ * consider ipipe_this_cpu_{root, head}_context().
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_percpu_context(struct ipipe_domain *ipd, int cpu)
+{
+       return __context_of(&per_cpu(ipipe_percpu, cpu), ipd);
+}
+
+/**
+ * ipipe_this_cpu_context - return the address of the pipeline context
+ * data for a domain on the current CPU. hw IRQs must be off.
+ *
+ * NOTE: this accessor is a bit faster, but since we don't know which
+ * one of "root" or "head" ipd refers to, we still need to compute the
+ * context address from its offset.
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_context(struct ipipe_domain *ipd)
+{
+       return __context_of(__ipipe_raw_cpu_ptr(&ipipe_percpu), ipd);
+}
+
+/**
+ * ipipe_this_cpu_root_context - return the address of the pipeline
+ * context data for the root domain on the current CPU. hw IRQs must
+ * be off.
+ *
+ * NOTE: this accessor is recommended when the domain we refer to is
+ * known at build time to be the root one.
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_root_context(void)
+{
+       return __ipipe_raw_cpu_ptr(&ipipe_percpu.root);
+}
+
+/**
+ * ipipe_this_cpu_head_context - return the address of the pipeline
+ * context data for the registered head domain on the current CPU. hw
+ * IRQs must be off.
+ *
+ * NOTE: this accessor is recommended when the domain we refer to is
+ * known at build time to be the registered head domain. This address
+ * is always different from the context data of the root domain in
+ * absence of registered head domain. To get the address of the
+ * context data for the domain leading the pipeline at the time of the
+ * call (which may be root in absence of registered head domain), use
+ * ipipe_this_cpu_leading_context() instead.
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_head_context(void)
+{
+       return __ipipe_raw_cpu_ptr(&ipipe_percpu.head);
+}
+
+/**
+ * ipipe_this_cpu_leading_context - return the address of the pipeline
+ * context data for the domain leading the pipeline on the current
+ * CPU. hw IRQs must be off.
+ *
+ * NOTE: this accessor is required when either root or a registered
+ * head domain may be the final target of this call, depending on
+ * whether the high priority domain was installed via
+ * ipipe_register_head().
+ */
+static inline struct ipipe_percpu_domain_data *
+ipipe_this_cpu_leading_context(void)
+{
+       return ipipe_this_cpu_context(ipipe_head_domain);
+}
+
+/**
+ * __ipipe_get_current_context() - return the address of the pipeline
+ * context data of the domain running on the current CPU. hw IRQs must
+ * be off.
+ */
+static inline struct ipipe_percpu_domain_data *__ipipe_get_current_context(void)
+{
+       return __ipipe_raw_cpu_read(ipipe_percpu.curr);
+}
+
+#define __ipipe_current_context __ipipe_get_current_context()
+
+/**
+ * __ipipe_set_current_context() - switch the current CPU to the
+ * specified domain context.  hw IRQs must be off.
+ *
+ * NOTE: this is the only way to change the current domain for the
+ * current CPU. Don't bypass.
+ */
+static inline
+void __ipipe_set_current_context(struct ipipe_percpu_domain_data *pd)
+{
+       struct ipipe_percpu_data *p;
+       p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+       p->curr = pd;
+}
+
+/**
+ * __ipipe_set_current_domain() - switch the current CPU to the
+ * specified domain. This is equivalent to calling
+ * __ipipe_set_current_context() with the context data of that
+ * domain. hw IRQs must be off.
+ */
+static inline void __ipipe_set_current_domain(struct ipipe_domain *ipd)
+{
+       struct ipipe_percpu_data *p;
+       p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+       p->curr = __context_of(p, ipd);
+}
+
+static inline struct ipipe_percpu_domain_data *ipipe_current_context(void)
+{
+       struct ipipe_percpu_domain_data *pd;
+       unsigned long flags;
+
+       flags = hard_smp_local_irq_save();
+       pd = __ipipe_get_current_context();
+       hard_smp_local_irq_restore(flags);
+
+       return pd;
+}
+
+static inline struct ipipe_domain *__ipipe_get_current_domain(void)
+{
+       return __ipipe_get_current_context()->domain;
+}
+
+#define __ipipe_current_domain __ipipe_get_current_domain()
+
+/**
+ * __ipipe_get_current_domain() - return the address of the pipeline
+ * domain running on the current CPU. hw IRQs must be off.
+ */
+static inline struct ipipe_domain *ipipe_get_current_domain(void)
+{
+       struct ipipe_domain *ipd;
+       unsigned long flags;
+
+       flags = hard_smp_local_irq_save();
+       ipd = __ipipe_get_current_domain();
+       hard_smp_local_irq_restore(flags);
+
+       return ipd;
+}
+
+#define ipipe_current_domain   ipipe_get_current_domain()
+
+#define __ipipe_root_p (__ipipe_current_domain == ipipe_root_domain)
+#define ipipe_root_p   (ipipe_current_domain == ipipe_root_domain)
+
+#ifdef CONFIG_SMP
+#define __ipipe_root_status    (ipipe_this_cpu_root_context()->status)
+#else
+extern unsigned long __ipipe_root_status;
+#endif
+
+#define __ipipe_head_status    (ipipe_this_cpu_head_context()->status)
+
+/**
+ * __ipipe_ipending_p() - Whether we have interrupts pending
+ * (i.e. logged) for the given domain context on the current CPU. hw
+ * IRQs must be off.
+ */
+static inline int __ipipe_ipending_p(struct ipipe_percpu_domain_data *pd)
+{
+       return pd->irqpend_himap != 0;
+}
+
+static inline unsigned long
+__ipipe_cpudata_irq_hits(struct ipipe_domain *ipd, int cpu, unsigned int irq)
+{
+       return ipipe_percpu_context(ipd, cpu)->irqall[irq];
+}
+
+#endif /* CONFIG_IPIPE */
+
+#endif /* !__LINUX_IPIPE_DOMAIN_H */
diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h
new file mode 100644 (file)
index 0000000..a108278
--- /dev/null
@@ -0,0 +1,327 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_lock.h
+ *
+ *   Copyright (C) 2009 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_LOCK_H
+#define __LINUX_IPIPE_LOCK_H
+
+typedef struct {
+       arch_spinlock_t arch_lock;
+} __ipipe_spinlock_t;
+
+#define ipipe_spinlock(lock)   ((__ipipe_spinlock_t *)(lock))
+#define ipipe_spinlock_p(lock)                                                 \
+       __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) ||     \
+       __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t [])
+
+#define std_spinlock_raw(lock) ((raw_spinlock_t *)(lock))
+#define std_spinlock_raw_p(lock)                                       \
+       __builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) || \
+       __builtin_types_compatible_p(typeof(lock), raw_spinlock_t [])
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)                             \
+       do {                                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
+               else if (std_spinlock_raw_p(lock))                              \
+                       __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
+               else __bad_lock_type();                                 \
+       } while (0)
+
+#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)                          \
+       ({                                                              \
+               int __ret__;                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
+               else if (std_spinlock_raw_p(lock))                              \
+                       __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
+               else __bad_lock_type();                                 \
+               __ret__;                                                \
+        })
+
+#define PICK_SPINTRYLOCK_IRQ(lock)                                     \
+       ({                                                              \
+               int __ret__;                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
+               else if (std_spinlock_raw_p(lock))                              \
+                       __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
+               else __bad_lock_type();                                 \
+               __ret__;                                                \
+        })
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)                                \
+       do {                                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
+               else if (std_spinlock_raw_p(lock)) {                    \
+                       __ipipe_spin_unlock_debug(flags);               \
+                       __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
+               } else __bad_lock_type();                               \
+       } while (0)
+
+#define PICK_SPINOP(op, lock)                                          \
+       ({                                                              \
+               if (ipipe_spinlock_p(lock))                             \
+                       arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+               else if (std_spinlock_raw_p(lock))                      \
+                       __real_raw_spin##op(std_spinlock_raw(lock));    \
+               else __bad_lock_type();                                 \
+               (void)0;                                                \
+       })
+
+#define PICK_SPINOP_RET(op, lock, type)                                        \
+       ({                                                              \
+               type __ret__;                                           \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+               else if (std_spinlock_raw_p(lock))                      \
+                       __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
+               else { __ret__ = -1; __bad_lock_type(); }               \
+               __ret__;                                                \
+       })
+
+#else /* !CONFIG_PREEMPT_RT_FULL */
+
+#define std_spinlock(lock)     ((spinlock_t *)(lock))
+#define std_spinlock_p(lock)                                           \
+       __builtin_types_compatible_p(typeof(lock), spinlock_t *) ||     \
+       __builtin_types_compatible_p(typeof(lock), spinlock_t [])
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)                             \
+       do {                                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
+               else if (std_spinlock_raw_p(lock))                              \
+                       __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
+               else if (std_spinlock_p(lock))                          \
+                       __real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \
+               else __bad_lock_type();                                 \
+       } while (0)
+
+#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)                          \
+       ({                                                              \
+               int __ret__;                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
+               else if (std_spinlock_raw_p(lock))                              \
+                       __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
+               else if (std_spinlock_p(lock))                          \
+                       __ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \
+               else __bad_lock_type();                                 \
+               __ret__;                                                \
+        })
+
+#define PICK_SPINTRYLOCK_IRQ(lock)                                     \
+       ({                                                              \
+               int __ret__;                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
+               else if (std_spinlock_raw_p(lock))                              \
+                       __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
+               else if (std_spinlock_p(lock))                          \
+                       __ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \
+               else __bad_lock_type();                                 \
+               __ret__;                                                \
+        })
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)                                \
+       do {                                                            \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
+               else {                                                  \
+                       __ipipe_spin_unlock_debug(flags);               \
+                       if (std_spinlock_raw_p(lock))                   \
+                               __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
+                       else if (std_spinlock_p(lock))                  \
+                               __real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \
+               }                                                       \
+       } while (0)
+
+#define PICK_SPINOP(op, lock)                                          \
+       ({                                                              \
+               if (ipipe_spinlock_p(lock))                             \
+                       arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+               else if (std_spinlock_raw_p(lock))                      \
+                       __real_raw_spin##op(std_spinlock_raw(lock));    \
+               else if (std_spinlock_p(lock))                          \
+                       __real_raw_spin##op(&std_spinlock(lock)->rlock); \
+               else __bad_lock_type();                                 \
+               (void)0;                                                \
+       })
+
+#define PICK_SPINOP_RET(op, lock, type)                                        \
+       ({                                                              \
+               type __ret__;                                           \
+               if (ipipe_spinlock_p(lock))                             \
+                       __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
+               else if (std_spinlock_raw_p(lock))                      \
+                       __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
+               else if (std_spinlock_p(lock))                          \
+                       __ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \
+               else { __ret__ = -1; __bad_lock_type(); }               \
+               __ret__;                                                \
+       })
+
+#endif /* !CONFIG_PREEMPT_RT_FULL */
+
+#define arch_spin_lock_init(lock)                                      \
+       do {                                                            \
+               IPIPE_DEFINE_SPINLOCK(__lock__);                        \
+               *((ipipe_spinlock_t *)lock) = __lock__;                 \
+       } while (0)
+
+#define arch_spin_lock_irq(lock)                                       \
+       do {                                                            \
+               hard_local_irq_disable();                               \
+               arch_spin_lock(lock);                                   \
+       } while (0)
+
+#define arch_spin_unlock_irq(lock)                                     \
+       do {                                                            \
+               arch_spin_unlock(lock);                                 \
+               hard_local_irq_enable();                                \
+       } while (0)
+
+typedef struct {
+       arch_rwlock_t arch_lock;
+} __ipipe_rwlock_t;
+
+#define ipipe_rwlock_p(lock)                                           \
+       __builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *)
+
+#define std_rwlock_p(lock)                                             \
+       __builtin_types_compatible_p(typeof(lock), rwlock_t *)
+
+#define ipipe_rwlock(lock)     ((__ipipe_rwlock_t *)(lock))
+#define std_rwlock(lock)       ((rwlock_t *)(lock))
+
+#define PICK_RWOP(op, lock)                                            \
+       do {                                                            \
+               if (ipipe_rwlock_p(lock))                               \
+                       arch##op(&ipipe_rwlock(lock)->arch_lock);       \
+               else if (std_rwlock_p(lock))                            \
+                       _raw##op(std_rwlock(lock));                     \
+               else __bad_lock_type();                                 \
+       } while (0)
+
+extern int __bad_lock_type(void);
+
+#ifdef CONFIG_IPIPE
+
+#define ipipe_spinlock_t               __ipipe_spinlock_t
+#define IPIPE_DEFINE_RAW_SPINLOCK(x)   ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
+#define IPIPE_DECLARE_RAW_SPINLOCK(x)  extern ipipe_spinlock_t x
+#define IPIPE_DEFINE_SPINLOCK(x)       IPIPE_DEFINE_RAW_SPINLOCK(x)
+#define IPIPE_DECLARE_SPINLOCK(x)      IPIPE_DECLARE_RAW_SPINLOCK(x)
+
+#define IPIPE_SPIN_LOCK_UNLOCKED                                       \
+       (__ipipe_spinlock_t) {  .arch_lock = __ARCH_SPIN_LOCK_UNLOCKED }
+
+#define spin_lock_irqsave_cond(lock, flags) \
+       spin_lock_irqsave(lock, flags)
+
+#define spin_unlock_irqrestore_cond(lock, flags) \
+       spin_unlock_irqrestore(lock, flags)
+
+#define raw_spin_lock_irqsave_cond(lock, flags) \
+       raw_spin_lock_irqsave(lock, flags)
+
+#define raw_spin_unlock_irqrestore_cond(lock, flags) \
+       raw_spin_unlock_irqrestore(lock, flags)
+
+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock);
+
+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock);
+
+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock);
+
+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock);
+
+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
+                                unsigned long *x);
+
+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
+                                   unsigned long x);
+
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
+
+void __ipipe_spin_unlock_irqcomplete(unsigned long x);
+
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+void __ipipe_spin_unlock_debug(unsigned long flags);
+#else
+#define __ipipe_spin_unlock_debug(flags)  do { } while (0)
+#endif
+
+#define ipipe_rwlock_t                 __ipipe_rwlock_t
+#define IPIPE_DEFINE_RWLOCK(x)         ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED
+#define IPIPE_DECLARE_RWLOCK(x)                extern ipipe_rwlock_t x
+
+#define IPIPE_RW_LOCK_UNLOCKED \
+       (__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED }
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_spinlock_t               spinlock_t
+#define IPIPE_DEFINE_SPINLOCK(x)       DEFINE_SPINLOCK(x)
+#define IPIPE_DECLARE_SPINLOCK(x)      extern spinlock_t x
+#define IPIPE_SPIN_LOCK_UNLOCKED       __SPIN_LOCK_UNLOCKED(unknown)
+#define IPIPE_DEFINE_RAW_SPINLOCK(x)   DEFINE_RAW_SPINLOCK(x)
+#define IPIPE_DECLARE_RAW_SPINLOCK(x)  extern raw_spinlock_t x
+
+#define spin_lock_irqsave_cond(lock, flags)            \
+       do {                                            \
+               (void)(flags);                          \
+               spin_lock(lock);                        \
+       } while(0)
+
+#define spin_unlock_irqrestore_cond(lock, flags)       \
+       spin_unlock(lock)
+
+#define raw_spin_lock_irqsave_cond(lock, flags) \
+       do {                                    \
+               (void)(flags);                  \
+               raw_spin_lock(lock);            \
+       } while(0)
+
+#define raw_spin_unlock_irqrestore_cond(lock, flags) \
+       raw_spin_unlock(lock)
+
+#define __ipipe_spin_lock_irq(lock)            do { } while (0)
+#define __ipipe_spin_unlock_irq(lock)          do { } while (0)
+#define __ipipe_spin_lock_irqsave(lock)                0
+#define __ipipe_spin_trylock_irq(lock)         1
+#define __ipipe_spin_trylock_irqsave(lock, x)  ({ (void)(x); 1; })
+#define __ipipe_spin_unlock_irqrestore(lock, x)        do { (void)(x); } while (0)
+#define __ipipe_spin_unlock_irqbegin(lock)     spin_unlock(lock)
+#define __ipipe_spin_unlock_irqcomplete(x)     do { (void)(x); } while (0)
+#define __ipipe_spin_unlock_debug(flags)       do { } while (0)
+
+#define ipipe_rwlock_t                 rwlock_t
+#define IPIPE_DEFINE_RWLOCK(x)         DEFINE_RWLOCK(x)
+#define IPIPE_DECLARE_RWLOCK(x)                extern rwlock_t x
+#define IPIPE_RW_LOCK_UNLOCKED         RW_LOCK_UNLOCKED
+
+#endif /* !CONFIG_IPIPE */
+
+#endif /* !__LINUX_IPIPE_LOCK_H */
diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h
new file mode 100644 (file)
index 0000000..120fb03
--- /dev/null
@@ -0,0 +1,159 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_tickdev.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ * Copyright (C) 2012 Gilles Chanteperdrix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_TICKDEV_H
+#define __LINUX_IPIPE_TICKDEV_H
+
+#include <linux/list.h>
+#include <linux/cpumask.h>
+#include <linux/clockchips.h>
+#include <linux/ipipe_domain.h>
+#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
+
+#ifdef CONFIG_IPIPE
+
+struct clock_event_device;
+
+struct ipipe_hostrt_data {
+       short live;
+       seqcount_t seqcount;
+       time_t wall_time_sec;
+       u32 wall_time_nsec;
+       struct timespec wall_to_monotonic;
+       u64 cycle_last;
+       u64 mask;
+       u32 mult;
+       u32 shift;
+};
+
+enum clock_event_mode {
+       CLOCK_EVT_MODE_PERIODIC,
+       CLOCK_EVT_MODE_ONESHOT,
+       CLOCK_EVT_MODE_UNUSED,
+       CLOCK_EVT_MODE_SHUTDOWN,
+};
+
+struct ipipe_timer {
+       int irq;
+       void (*request)(struct ipipe_timer *timer, int steal);
+       int (*set)(unsigned long ticks, void *timer);
+       void (*ack)(void);
+       void (*release)(struct ipipe_timer *timer);
+
+       /* Only if registering a timer directly */
+       const char *name;
+       unsigned rating;
+       unsigned long freq;
+       unsigned min_delay_ticks;
+       const struct cpumask *cpumask;
+
+       /* For internal use */
+       void *timer_set;        /* pointer passed to ->set() callback */
+       struct clock_event_device *host_timer;
+       struct list_head link;
+       
+       /* Conversions between clock frequency and timer frequency */
+       unsigned c2t_integ;
+       unsigned c2t_frac;
+
+       /* For clockevent interception */
+       u32 real_mult;
+       u32 real_shift;
+       void (*mode_handler)(enum clock_event_mode mode,
+                            struct clock_event_device *);
+       int orig_mode;
+       int (*orig_set_state_periodic)(struct clock_event_device *);
+       int (*orig_set_state_oneshot)(struct clock_event_device *);
+       int (*orig_set_state_oneshot_stopped)(struct clock_event_device *);
+       int (*orig_set_state_shutdown)(struct clock_event_device *);
+       int (*orig_set_next_event)(unsigned long evt,
+                                  struct clock_event_device *cdev);
+       unsigned int (*refresh_freq)(void);
+};
+
+#define __ipipe_hrtimer_irq __ipipe_raw_cpu_read(ipipe_percpu.hrtimer_irq)
+
+extern unsigned long __ipipe_hrtimer_freq;
+
+/*
+ * Called by clockevents_register_device, to register a piggybacked
+ * ipipe timer, if there is one
+ */
+void ipipe_host_timer_register(struct clock_event_device *clkevt);
+
+/*
+ * Register a standalone ipipe timer
+ */
+void ipipe_timer_register(struct ipipe_timer *timer);
+
+/*
+ * Chooses the best timer for each cpu. Take over its handling.
+ */
+int ipipe_select_timers(const struct cpumask *mask);
+
+/*
+ * Release the per-cpu timers
+ */
+void ipipe_timers_release(void);
+
+/*
+ * Start handling the per-cpu timer irq, and intercepting the linux clockevent
+ * device callbacks.
+ */
+int ipipe_timer_start(void (*tick_handler)(void),
+                     void (*emumode)(enum clock_event_mode mode,
+                                     struct clock_event_device *cdev),
+                     int (*emutick)(unsigned long evt,
+                                    struct clock_event_device *cdev),
+                     unsigned cpu);
+
+/*
+ * Stop handling a per-cpu timer
+ */
+void ipipe_timer_stop(unsigned cpu);
+
+/*
+ * Program the timer
+ */
+void ipipe_timer_set(unsigned long delay);
+
+const char *ipipe_timer_name(void);
+
+unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns);
+
+void __ipipe_timer_refresh_freq(unsigned int hrclock_freq);
+
+#else /* !CONFIG_IPIPE */
+
+#define ipipe_host_timer_register(clkevt) do { } while (0)
+
+#endif /* !CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_HAVE_HOSTRT
+void ipipe_update_hostrt(struct timekeeper *tk);
+#else
+static inline void
+ipipe_update_hostrt(struct timekeeper *tk) {}
+#endif
+
+#endif /* __LINUX_IPIPE_TICKDEV_H */
diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h
new file mode 100644 (file)
index 0000000..379c5e3
--- /dev/null
@@ -0,0 +1,83 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_trace.h
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _LINUX_IPIPE_TRACE_H
+#define _LINUX_IPIPE_TRACE_H
+
+#ifdef CONFIG_IPIPE_TRACE
+
+#include <linux/types.h>
+
+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
+#endif /* !BUILTIN_RETURN_ADDRESS */
+
+struct pt_regs;
+
+void ipipe_trace_begin(unsigned long v);
+void ipipe_trace_end(unsigned long v);
+void ipipe_trace_freeze(unsigned long v);
+void ipipe_trace_special(unsigned char special_id, unsigned long v);
+void ipipe_trace_pid(pid_t pid, short prio);
+void ipipe_trace_event(unsigned char id, unsigned long delay_tsc);
+int ipipe_trace_max_reset(void);
+int ipipe_trace_frozen_reset(void);
+void ipipe_trace_irqbegin(int irq, struct pt_regs *regs);
+void ipipe_trace_irqend(int irq, struct pt_regs *regs);
+
+#else /* !CONFIG_IPIPE_TRACE */
+
+#define ipipe_trace_begin(v)                   do { (void)(v); } while(0)
+#define ipipe_trace_end(v)                     do { (void)(v); } while(0)
+#define ipipe_trace_freeze(v)                  do { (void)(v); } while(0)
+#define ipipe_trace_special(id, v)             do { (void)(id); (void)(v); } while(0)
+#define ipipe_trace_pid(pid, prio)             do { (void)(pid); (void)(prio); } while(0)
+#define ipipe_trace_event(id, delay_tsc)       do { (void)(id); (void)(delay_tsc); } while(0)
+#define ipipe_trace_max_reset()                        ({ 0; })
+#define ipipe_trace_frozen_reset()             ({ 0; })
+#define ipipe_trace_irqbegin(irq, regs)                do { } while(0)
+#define ipipe_trace_irqend(irq, regs)          do { } while(0)
+
+#endif /* !CONFIG_IPIPE_TRACE */
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+void ipipe_trace_panic_freeze(void);
+void ipipe_trace_panic_dump(void);
+#else
+static inline void ipipe_trace_panic_freeze(void) { }
+static inline void ipipe_trace_panic_dump(void) { }
+#endif
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define ipipe_trace_irq_entry(irq)     ipipe_trace_begin(irq)
+#define ipipe_trace_irq_exit(irq)      ipipe_trace_end(irq)
+#define ipipe_trace_irqsoff()          ipipe_trace_begin(0x80000000UL)
+#define ipipe_trace_irqson()           ipipe_trace_end(0x80000000UL)
+#else
+#define ipipe_trace_irq_entry(irq)     do { (void)(irq);} while(0)
+#define ipipe_trace_irq_exit(irq)      do { (void)(irq);} while(0)
+#define ipipe_trace_irqsoff()          do { } while(0)
+#define ipipe_trace_irqson()           do { } while(0)
+#endif
+
+#endif /* !__LINUX_IPIPE_TRACE_H */
index 5bd3f151da7857ca488bdd1740833ab6f48bc5b9..548bb8edc38c6fc17a5174d3b6e6d22227c5e64e 100644 (file)
@@ -255,7 +255,27 @@ do { \
 
 #endif /* CONFIG_PREEMPT_COUNT */
 
-#ifdef MODULE
+#ifdef CONFIG_IPIPE
+#define hard_preempt_disable()                         \
+       ({                                              \
+               unsigned long __flags__;                \
+               __flags__ = hard_local_irq_save();      \
+               if (__ipipe_root_p)                     \
+                       preempt_disable();              \
+               __flags__;                              \
+       })
+
+#define hard_preempt_enable(__flags__)                 \
+       do {                                            \
+               if (__ipipe_root_p) {                   \
+                       preempt_enable_no_resched();    \
+                       hard_local_irq_restore(__flags__);      \
+                       preempt_check_resched();        \
+               } else                                  \
+                       hard_local_irq_restore(__flags__);      \
+       } while (0)
+
+#elif defined(MODULE)
 /*
  * Modules have no business playing preemption tricks.
  */
@@ -263,7 +283,7 @@ do { \
 #undef preempt_enable_no_resched
 #undef preempt_enable_no_resched_notrace
 #undef preempt_check_resched
-#endif
+#endif /* !IPIPE && MODULE */
 
 #define preempt_set_need_resched() \
 do { \
index 46075327c165dd798b0283dd8600b21e2396a61b..ea6e9e4c2b2a3906803a9a063aa127af287fda5d 100644 (file)
@@ -80,6 +80,7 @@ config COMPILE_TEST
 
 config LOCALVERSION
        string "Local version - append to kernel release"
+       default "-ipipe"
        help
          Append an extra string to the end of your kernel version.
          This will show up when you type uname, for example.
index b32ec72cdf3dd8731b53b57975d96d2edd6cbf0a..ff9630e49b00fa6f394aafc3811484460d84c662 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/cpuset.h>
 #include <linux/cgroup.h>
 #include <linux/efi.h>
+#include <linux/ipipe.h>
 #include <linux/tick.h>
 #include <linux/interrupt.h>
 #include <linux/taskstats_kern.h>
@@ -522,7 +523,7 @@ asmlinkage __visible void __init start_kernel(void)
 
        cgroup_init_early();
 
-       local_irq_disable();
+       hard_local_irq_disable();
        early_boot_irqs_disabled = true;
 
        /*
@@ -570,6 +571,7 @@ asmlinkage __visible void __init start_kernel(void)
        pidhash_init();
        vfs_caches_init_early();
        sort_main_extable();
+       __ipipe_init_early();
        trap_init();
        mm_init();
 
@@ -617,6 +619,11 @@ asmlinkage __visible void __init start_kernel(void)
        softirq_init();
        timekeeping_init();
        time_init();
+       /*
+        * We need to wait for the interrupt and time subsystems to be
+        * initialized before enabling the pipeline.
+        */
+       __ipipe_init();
        sched_clock_postinit();
        printk_safe_init();
        perf_event_init();
@@ -914,6 +921,7 @@ static void __init do_basic_setup(void)
        shmem_init();
        driver_init();
        init_irq_proc();
+       __ipipe_init_proc();
        do_ctors();
        usermodehelper_enable();
        do_initcalls();
index 172d151d429caeb3b49e6c568e4522173eba02ea..06c944bef9a314fb47438cf0791ae57cb18173ac 100644 (file)
@@ -87,6 +87,7 @@ obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_IPIPE) += ipipe/
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig
new file mode 100644 (file)
index 0000000..c8d38e5
--- /dev/null
@@ -0,0 +1,44 @@
+config IPIPE
+       bool "Interrupt pipeline"
+       default y
+       ---help---
+         Activate this option if you want the interrupt pipeline to be
+         compiled in.
+
+config IPIPE_CORE
+       def_bool y if IPIPE
+
+config IPIPE_WANT_CLOCKSOURCE
+       bool
+
+config IPIPE_WANT_PTE_PINNING
+       bool
+
+config IPIPE_CORE_APIREV
+       int
+       depends on IPIPE
+       default 2
+       ---help---
+         The API revision level we implement.
+
+config IPIPE_WANT_APIREV_2
+       bool
+
+config IPIPE_TARGET_APIREV
+       int
+       depends on IPIPE
+       default IPIPE_CORE_APIREV
+       ---help---
+         The API revision level the we want (must be <=
+         IPIPE_CORE_APIREV).
+
+config IPIPE_HAVE_HOSTRT
+       bool
+
+config IPIPE_HAVE_EAGER_FPU
+       bool
+
+if IPIPE && ARM && RAW_PRINTK && !DEBUG_LL
+comment "CAUTION: DEBUG_LL must be selected, and properly configured for"
+comment "RAW_PRINTK to work. Otherwise, you will get no output on raw_printk()"
+endif
diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug
new file mode 100644 (file)
index 0000000..cee7fab
--- /dev/null
@@ -0,0 +1,96 @@
+config IPIPE_DEBUG
+       bool "I-pipe debugging"
+       depends on IPIPE
+       select RAW_PRINTK
+
+config IPIPE_DEBUG_CONTEXT
+       bool "Check for illicit cross-domain calls"
+       depends on IPIPE_DEBUG
+       default y
+       ---help---
+         Enable this feature to arm checkpoints in the kernel that
+         verify the correct invocation context. On entry of critical
+         Linux services a warning is issued if the caller is not
+         running over the root domain.
+
+config IPIPE_DEBUG_INTERNAL
+       bool "Enable internal debug checks"
+       depends on IPIPE_DEBUG
+       default y
+       ---help---
+         When this feature is enabled, I-pipe will perform internal
+         consistency checks of its subsystems, e.g. on per-cpu variable
+         access.
+
+config IPIPE_TRACE
+       bool "Latency tracing"
+       depends on IPIPE_DEBUG
+       select CONFIG_FTRACE
+       select CONFIG_FUNCTION_TRACER
+       select KALLSYMS
+       select PROC_FS
+       ---help---
+         Activate this option if you want to use per-function tracing of
+         the kernel. The tracer will collect data via instrumentation
+         features like the one below or with the help of explicite calls
+         of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
+         in-kernel tracing API. The collected data and runtime control
+         is available via /proc/ipipe/trace/*.
+
+if IPIPE_TRACE
+
+config IPIPE_TRACE_ENABLE
+       bool "Enable tracing on boot"
+       default y
+       ---help---
+         Disable this option if you want to arm the tracer after booting
+         manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
+         boot time on slow embedded devices due to the tracer overhead.
+
+config IPIPE_TRACE_MCOUNT
+       bool "Instrument function entries"
+       default y
+       select FTRACE
+       select FUNCTION_TRACER
+       ---help---
+         When enabled, records every kernel function entry in the tracer
+         log. While this slows down the system noticeably, it provides
+         the highest level of information about the flow of events.
+         However, it can be switch off in order to record only explicit
+         I-pipe trace points.
+
+config IPIPE_TRACE_IRQSOFF
+       bool "Trace IRQs-off times"
+       default y
+       ---help---
+         Activate this option if I-pipe shall trace the longest path
+         with hard-IRQs switched off.
+
+config IPIPE_TRACE_SHIFT
+       int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
+       range 10 18
+       default 14
+       ---help---
+         The number of trace points to hold tracing data for each
+         trace path, as a power of 2.
+
+config IPIPE_TRACE_VMALLOC
+       bool "Use vmalloc'ed trace buffer"
+       default y if EMBEDDED
+       ---help---
+         Instead of reserving static kernel data, the required buffer
+         is allocated via vmalloc during boot-up when this option is
+         enabled. This can help to start systems that are low on memory,
+         but it slightly degrades overall performance. Try this option
+         when a traced kernel hangs unexpectedly at boot time.
+
+config IPIPE_TRACE_PANIC
+       bool "Enable panic back traces"
+       default y
+       ---help---
+         Provides services to freeze and dump a back trace on panic
+         situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
+         as well as ordinary kernel oopses. You can control the number
+         of printed back trace points via /proc/ipipe/trace.
+
+endif
diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile
new file mode 100644 (file)
index 0000000..7375515
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_IPIPE)    += core.o timer.o
+obj-$(CONFIG_IPIPE_TRACE) += tracer.o
diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c
new file mode 100644 (file)
index 0000000..91b68ec
--- /dev/null
@@ -0,0 +1,1879 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/core.c
+ *
+ * Copyright (C) 2002-2012 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Architecture-independent I-PIPE core support.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/kallsyms.h>
+#include <linux/bitops.h>
+#include <linux/tick.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif /* CONFIG_PROC_FS */
+#include <linux/ipipe_trace.h>
+#include <linux/ipipe.h>
+#include <ipipe/setup.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
+
+struct ipipe_domain ipipe_root;
+EXPORT_SYMBOL_GPL(ipipe_root);
+
+struct ipipe_domain *ipipe_head_domain = &ipipe_root;
+EXPORT_SYMBOL_GPL(ipipe_head_domain);
+
+#ifdef CONFIG_SMP
+static __initdata struct ipipe_percpu_domain_data bootup_context = {
+       .status = IPIPE_STALL_MASK,
+       .domain = &ipipe_root,
+};
+#else
+#define bootup_context ipipe_percpu.root
+#endif /* !CONFIG_SMP */
+
+DEFINE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu) = {
+       .root = {
+               .status = IPIPE_STALL_MASK,
+               .domain = &ipipe_root,
+       },
+       .curr = &bootup_context,
+       .hrtimer_irq = -1,
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+       .context_check = 1,
+#endif
+};
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu);
+
+/* Up to 2k of pending work data per CPU. */
+#define WORKBUF_SIZE 2048
+static DEFINE_PER_CPU_ALIGNED(unsigned char[WORKBUF_SIZE], work_buf);
+static DEFINE_PER_CPU(void *, work_tail);
+static unsigned int __ipipe_work_virq;
+
+static void __ipipe_do_work(unsigned int virq, void *cookie);
+
+#ifdef CONFIG_SMP
+
+#define IPIPE_CRITICAL_TIMEOUT 1000000
+static cpumask_t __ipipe_cpu_sync_map;
+static cpumask_t __ipipe_cpu_lock_map;
+static cpumask_t __ipipe_cpu_pass_map;
+static unsigned long __ipipe_critical_lock;
+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
+static void (*__ipipe_cpu_sync) (void);
+
+#else /* !CONFIG_SMP */
+/*
+ * Create an alias to the unique root status, so that arch-dep code
+ * may get fast access to this percpu variable including from
+ * assembly.  A hard-coded assumption is that root.status appears at
+ * offset #0 of the ipipe_percpu struct.
+ */
+extern unsigned long __ipipe_root_status
+__attribute__((alias(__stringify(ipipe_percpu))));
+EXPORT_SYMBOL(__ipipe_root_status);
+
+#endif /* !CONFIG_SMP */
+
+IPIPE_DEFINE_SPINLOCK(__ipipe_lock);
+
+static unsigned long __ipipe_virtual_irq_map;
+
+#ifdef CONFIG_PRINTK
+unsigned int __ipipe_printk_virq;
+int __ipipe_printk_bypass;
+#endif /* CONFIG_PRINTK */
+
+#ifdef CONFIG_PROC_FS
+
+struct proc_dir_entry *ipipe_proc_root;
+
+static int __ipipe_version_info_show(struct seq_file *p, void *data)
+{
+       seq_printf(p, "%d\n", IPIPE_CORE_RELEASE);
+       return 0;
+}
+
+static int __ipipe_version_info_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, __ipipe_version_info_show, NULL);
+}
+
+static const struct file_operations __ipipe_version_proc_ops = {
+       .open           = __ipipe_version_info_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int __ipipe_common_info_show(struct seq_file *p, void *data)
+{
+       struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
+       char handling, lockbit, virtuality;
+       unsigned long ctlbits;
+       unsigned int irq;
+
+       seq_printf(p, "        +--- Handled\n");
+       seq_printf(p, "        |+-- Locked\n");
+       seq_printf(p, "        ||+- Virtual\n");
+       seq_printf(p, " [IRQ]  |||  Handler\n");
+
+       mutex_lock(&ipd->mutex);
+
+       for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+               ctlbits = ipd->irqs[irq].control;
+               /*
+                * There might be a hole between the last external IRQ
+                * and the first virtual one; skip it.
+                */
+               if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
+                       continue;
+
+               if (ipipe_virtual_irq_p(irq)
+                   && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
+                       /* Non-allocated virtual IRQ; skip it. */
+                       continue;
+
+               if (ctlbits & IPIPE_HANDLE_MASK)
+                       handling = 'H';
+               else
+                       handling = '.';
+
+               if (ctlbits & IPIPE_LOCK_MASK)
+                       lockbit = 'L';
+               else
+                       lockbit = '.';
+
+               if (ipipe_virtual_irq_p(irq))
+                       virtuality = 'V';
+               else
+                       virtuality = '.';
+
+               if (ctlbits & IPIPE_HANDLE_MASK)
+                       seq_printf(p, " %4u:  %c%c%c  %pf\n",
+                                  irq, handling, lockbit, virtuality,
+                                  ipd->irqs[irq].handler);
+               else
+                       seq_printf(p, " %4u:  %c%c%c\n",
+                                  irq, handling, lockbit, virtuality);
+       }
+
+       mutex_unlock(&ipd->mutex);
+
+       return 0;
+}
+
+static int __ipipe_common_info_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, __ipipe_common_info_show, PDE_DATA(inode));
+}
+
+static const struct file_operations __ipipe_info_proc_ops = {
+       .owner          = THIS_MODULE,
+       .open           = __ipipe_common_info_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+void add_domain_proc(struct ipipe_domain *ipd)
+{
+       proc_create_data(ipd->name, 0444, ipipe_proc_root,
+                        &__ipipe_info_proc_ops, ipd);
+}
+
+void remove_domain_proc(struct ipipe_domain *ipd)
+{
+       remove_proc_entry(ipd->name, ipipe_proc_root);
+}
+
+void __init __ipipe_init_proc(void)
+{
+       ipipe_proc_root = proc_mkdir("ipipe", NULL);
+       proc_create("version", 0444, ipipe_proc_root,
+                   &__ipipe_version_proc_ops);
+       add_domain_proc(ipipe_root_domain);
+
+       __ipipe_init_tracer();
+}
+
+#else
+
+static inline void add_domain_proc(struct ipipe_domain *ipd)
+{
+}
+
+static inline void remove_domain_proc(struct ipipe_domain *ipd)
+{
+}
+
+#endif /* CONFIG_PROC_FS */
+
+static void init_stage(struct ipipe_domain *ipd)
+{
+       memset(&ipd->irqs, 0, sizeof(ipd->irqs));
+       mutex_init(&ipd->mutex);
+       __ipipe_hook_critical_ipi(ipd);
+}
+
+static inline int root_context_offset(void)
+{
+       void root_context_not_at_start_of_ipipe_percpu(void);
+
+       /* ipipe_percpu.root must be found at offset #0. */
+
+       if (offsetof(struct ipipe_percpu_data, root))
+               root_context_not_at_start_of_ipipe_percpu();
+
+       return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static inline void fixup_percpu_data(void)
+{
+       struct ipipe_percpu_data *p;
+       int cpu;
+
+       /*
+        * ipipe_percpu.curr cannot be assigned statically to
+        * &ipipe_percpu.root, due to the dynamic nature of percpu
+        * data. So we make ipipe_percpu.curr refer to a temporary
+        * boot up context in static memory, until we can fixup all
+        * context pointers in this routine, after per-cpu areas have
+        * been eventually set up. The temporary context data is
+        * copied to per_cpu(ipipe_percpu, 0).root in the same move.
+        *
+        * Obviously, this code must run over the boot CPU, before SMP
+        * operations start.
+        */
+       BUG_ON(smp_processor_id() || !irqs_disabled());
+
+       per_cpu(ipipe_percpu, 0).root = bootup_context;
+
+       for_each_possible_cpu(cpu) {
+               p = &per_cpu(ipipe_percpu, cpu);
+               p->curr = &p->root;
+       }
+}
+
+#else /* !CONFIG_SMP */
+
+static inline void fixup_percpu_data(void) { }
+
+#endif /* CONFIG_SMP */
+
+void __init __ipipe_init_early(void)
+{
+       struct ipipe_domain *ipd = &ipipe_root;
+       int cpu;
+
+       fixup_percpu_data();
+
+       /*
+        * A lightweight registration code for the root domain. We are
+        * running on the boot CPU, hw interrupts are off, and
+        * secondary CPUs are still lost in space.
+        */
+       ipd->name = "Linux";
+       ipd->context_offset = root_context_offset();
+       init_stage(ipd);
+
+       /*
+        * Do the early init stuff. First we do the per-arch pipeline
+        * core setup, then we run the per-client setup code. At this
+        * point, the kernel does not provide much services yet: be
+        * careful.
+        */
+       __ipipe_early_core_setup();
+       __ipipe_early_client_setup();
+
+#ifdef CONFIG_PRINTK
+       __ipipe_printk_virq = ipipe_alloc_virq();
+       ipd->irqs[__ipipe_printk_virq].handler = __ipipe_flush_printk;
+       ipd->irqs[__ipipe_printk_virq].cookie = NULL;
+       ipd->irqs[__ipipe_printk_virq].ackfn = NULL;
+       ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
+#endif /* CONFIG_PRINTK */
+
+       __ipipe_work_virq = ipipe_alloc_virq();
+       ipd->irqs[__ipipe_work_virq].handler = __ipipe_do_work;
+       ipd->irqs[__ipipe_work_virq].cookie = NULL;
+       ipd->irqs[__ipipe_work_virq].ackfn = NULL;
+       ipd->irqs[__ipipe_work_virq].control = IPIPE_HANDLE_MASK;
+
+       for_each_possible_cpu(cpu)
+               per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu);
+}
+
+void __init __ipipe_init(void)
+{
+       /* Now we may engage the pipeline. */
+       __ipipe_enable_pipeline();
+
+       pr_info("Interrupt pipeline (release #%d)\n", IPIPE_CORE_RELEASE);
+}
+
+static inline void init_head_stage(struct ipipe_domain *ipd)
+{
+       struct ipipe_percpu_domain_data *p;
+       int cpu;
+
+       /* Must be set first, used in ipipe_percpu_context(). */
+       ipd->context_offset = offsetof(struct ipipe_percpu_data, head);
+
+       for_each_online_cpu(cpu) {
+               p = ipipe_percpu_context(ipd, cpu);
+               memset(p, 0, sizeof(*p));
+               p->domain = ipd;
+       }
+
+       init_stage(ipd);
+}
+
+void ipipe_register_head(struct ipipe_domain *ipd, const char *name)
+{
+       BUG_ON(!ipipe_root_p || ipd == &ipipe_root);
+
+       ipd->name = name;
+       init_head_stage(ipd);
+       barrier();
+       ipipe_head_domain = ipd;
+       add_domain_proc(ipd);
+
+       pr_info("I-pipe: head domain %s registered.\n", name);
+}
+EXPORT_SYMBOL_GPL(ipipe_register_head);
+
+void ipipe_unregister_head(struct ipipe_domain *ipd)
+{
+       BUG_ON(!ipipe_root_p || ipd != ipipe_head_domain);
+
+       ipipe_head_domain = &ipipe_root;
+       smp_mb();
+       mutex_lock(&ipd->mutex);
+       remove_domain_proc(ipd);
+       mutex_unlock(&ipd->mutex);
+
+       pr_info("I-pipe: head domain %s unregistered.\n", ipd->name);
+}
+EXPORT_SYMBOL_GPL(ipipe_unregister_head);
+
+void ipipe_unstall_root(void)
+{
+       struct ipipe_percpu_domain_data *p;
+
+       hard_local_irq_disable();
+
+       /* This helps catching bad usage from assembly call sites. */
+       ipipe_root_only();
+
+       p = ipipe_this_cpu_root_context();
+
+       __clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+       if (unlikely(__ipipe_ipending_p(p)))
+               __ipipe_sync_stage();
+
+       hard_local_irq_enable();
+}
+EXPORT_SYMBOL(ipipe_unstall_root);
+
+void ipipe_restore_root(unsigned long x)
+{
+       ipipe_root_only();
+
+       if (x)
+               ipipe_stall_root();
+       else
+               ipipe_unstall_root();
+}
+EXPORT_SYMBOL(ipipe_restore_root);
+
+void __ipipe_restore_root_nosync(unsigned long x)
+{
+       struct ipipe_percpu_domain_data *p = ipipe_this_cpu_root_context();
+
+       if (raw_irqs_disabled_flags(x)) {
+               __set_bit(IPIPE_STALL_FLAG, &p->status);
+               trace_hardirqs_off();
+       } else {
+               trace_hardirqs_on();
+               __clear_bit(IPIPE_STALL_FLAG, &p->status);
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_restore_root_nosync);
+
+void ipipe_unstall_head(void)
+{
+       struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context();
+
+       hard_local_irq_disable();
+
+       __clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+       if (unlikely(__ipipe_ipending_p(p)))
+               __ipipe_sync_pipeline(ipipe_head_domain);
+
+       hard_local_irq_enable();
+}
+EXPORT_SYMBOL_GPL(ipipe_unstall_head);
+
+void __ipipe_restore_head(unsigned long x) /* hw interrupt off */
+{
+       struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context();
+
+       if (x) {
+#ifdef CONFIG_DEBUG_KERNEL
+               static int warned;
+               if (!warned &&
+                   __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) {
+                       /*
+                        * Already stalled albeit ipipe_restore_head()
+                        * should have detected it? Send a warning once.
+                        */
+                       hard_local_irq_enable();
+                       warned = 1;
+                       pr_warning("I-pipe: ipipe_restore_head() "
+                                  "optimization failed.\n");
+                       dump_stack();
+                       hard_local_irq_disable();
+               }
+#else /* !CONFIG_DEBUG_KERNEL */
+               __set_bit(IPIPE_STALL_FLAG, &p->status);
+#endif /* CONFIG_DEBUG_KERNEL */
+       } else {
+               __clear_bit(IPIPE_STALL_FLAG, &p->status);
+               if (unlikely(__ipipe_ipending_p(p)))
+                       __ipipe_sync_pipeline(ipipe_head_domain);
+               hard_local_irq_enable();
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_restore_head);
+
+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock)
+{
+       hard_local_irq_disable();
+       if (ipipe_smp_p)
+               arch_spin_lock(&lock->arch_lock);
+       __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irq);
+
+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock)
+{
+       if (ipipe_smp_p)
+               arch_spin_unlock(&lock->arch_lock);
+       __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+       hard_local_irq_enable();
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irq);
+
+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock)
+{
+       unsigned long flags;
+       int s;
+
+       flags = hard_local_irq_save();
+       if (ipipe_smp_p)
+               arch_spin_lock(&lock->arch_lock);
+       s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+
+       return arch_mangle_irq_bits(s, flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irqsave);
+
+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
+                                unsigned long *x)
+{
+       unsigned long flags;
+       int s;
+
+       flags = hard_local_irq_save();
+       if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) {
+               hard_local_irq_restore(flags);
+               return 0;
+       }
+       s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+       *x = arch_mangle_irq_bits(s, flags);
+
+       return 1;
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irqsave);
+
+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
+                                   unsigned long x)
+{
+       if (ipipe_smp_p)
+               arch_spin_unlock(&lock->arch_lock);
+       if (!arch_demangle_irq_bits(&x))
+               __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+       hard_local_irq_restore(x);
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irqrestore);
+
+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock)
+{
+       unsigned long flags;
+
+       flags = hard_local_irq_save();
+       if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) {
+               hard_local_irq_restore(flags);
+               return 0;
+       }
+       __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+
+       return 1;
+}
+EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irq);
+
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
+{
+       if (ipipe_smp_p)
+               arch_spin_unlock(&lock->arch_lock);
+}
+
+void __ipipe_spin_unlock_irqcomplete(unsigned long x)
+{
+       if (!arch_demangle_irq_bits(&x))
+               __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
+       hard_local_irq_restore(x);
+}
+
+#ifdef __IPIPE_3LEVEL_IRQMAP
+
+/* Must be called hw IRQs off. */
+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
+                                       unsigned int irq)
+{
+       __set_bit(irq, p->irqheld_map);
+       p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
+{
+       struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
+       int l0b, l1b;
+
+       IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+       l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+       l1b = irq / BITS_PER_LONG;
+
+       if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+               __set_bit(irq, p->irqpend_lomap);
+               __set_bit(l1b, p->irqpend_mdmap);
+               __set_bit(l0b, &p->irqpend_himap);
+       } else
+               __set_bit(irq, p->irqheld_map);
+
+       p->irqall[irq]++;
+}
+EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
+
+/* Must be called hw IRQs off. */
+void __ipipe_lock_irq(unsigned int irq)
+{
+       struct ipipe_domain *ipd = ipipe_root_domain;
+       struct ipipe_percpu_domain_data *p;
+       int l0b, l1b;
+
+       IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+       /*
+        * Interrupts requested by a registered head domain cannot be
+        * locked, since this would make no sense: interrupts are
+        * globally masked at CPU level when the head domain is
+        * stalled, so there is no way we could encounter the
+        * situation IRQ locks are handling.
+        */
+       if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+               return;
+
+       l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+       l1b = irq / BITS_PER_LONG;
+
+       p = ipipe_this_cpu_context(ipd);
+       if (__test_and_clear_bit(irq, p->irqpend_lomap)) {
+               __set_bit(irq, p->irqheld_map);
+               if (p->irqpend_lomap[l1b] == 0) {
+                       __clear_bit(l1b, p->irqpend_mdmap);
+                       if (p->irqpend_mdmap[l0b] == 0)
+                               __clear_bit(l0b, &p->irqpend_himap);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
+
+/* Must be called hw IRQs off. */
+void __ipipe_unlock_irq(unsigned int irq)
+{
+       struct ipipe_domain *ipd = ipipe_root_domain;
+       struct ipipe_percpu_domain_data *p;
+       int l0b, l1b, cpu;
+
+       IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+       if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+               return;
+
+       l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
+       l1b = irq / BITS_PER_LONG;
+
+       for_each_online_cpu(cpu) {
+               p = ipipe_this_cpu_root_context();
+               if (test_and_clear_bit(irq, p->irqheld_map)) {
+                       /* We need atomic ops here: */
+                       set_bit(irq, p->irqpend_lomap);
+                       set_bit(l1b, p->irqpend_mdmap);
+                       set_bit(l0b, &p->irqpend_himap);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
+
+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
+{
+       int l0b, l1b, l2b;
+       unsigned long l0m, l1m, l2m;
+       unsigned int irq;
+
+       l0m = p->irqpend_himap;
+       if (unlikely(l0m == 0))
+               return -1;
+
+       l0b = __ipipe_ffnz(l0m);
+       l1m = p->irqpend_mdmap[l0b];
+       if (unlikely(l1m == 0))
+               return -1;
+
+       l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG;
+       l2m = p->irqpend_lomap[l1b];
+       if (unlikely(l2m == 0))
+               return -1;
+
+       l2b = __ipipe_ffnz(l2m);
+       irq = l1b * BITS_PER_LONG + l2b;
+
+       __clear_bit(irq, p->irqpend_lomap);
+       if (p->irqpend_lomap[l1b] == 0) {
+               __clear_bit(l1b, p->irqpend_mdmap);
+               if (p->irqpend_mdmap[l0b] == 0)
+                       __clear_bit(l0b, &p->irqpend_himap);
+       }
+
+       return irq;
+}
+
+#else /* __IPIPE_2LEVEL_IRQMAP */
+
+/* Must be called hw IRQs off. */
+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
+                                       unsigned int irq)
+{
+       __set_bit(irq, p->irqheld_map);
+       p->irqall[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
+{
+       struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
+       int l0b = irq / BITS_PER_LONG;
+
+       IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+       if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+               __set_bit(irq, p->irqpend_lomap);
+               __set_bit(l0b, &p->irqpend_himap);
+       } else
+               __set_bit(irq, p->irqheld_map);
+
+       p->irqall[irq]++;
+}
+EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
+
+/* Must be called hw IRQs off. */
+void __ipipe_lock_irq(unsigned int irq)
+{
+       struct ipipe_percpu_domain_data *p;
+       int l0b = irq / BITS_PER_LONG;
+
+       IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+       if (test_and_set_bit(IPIPE_LOCK_FLAG,
+                            &ipipe_root_domain->irqs[irq].control))
+               return;
+
+       p = ipipe_this_cpu_root_context();
+       if (__test_and_clear_bit(irq, p->irqpend_lomap)) {
+               __set_bit(irq, p->irqheld_map);
+               if (p->irqpend_lomap[l0b] == 0)
+                       __clear_bit(l0b, &p->irqpend_himap);
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
+
+/* Must be called hw IRQs off. */
+void __ipipe_unlock_irq(unsigned int irq)
+{
+       struct ipipe_domain *ipd = ipipe_root_domain;
+       struct ipipe_percpu_domain_data *p;
+       int l0b = irq / BITS_PER_LONG, cpu;
+
+       IPIPE_WARN_ONCE(!hard_irqs_disabled());
+
+       if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+               return;
+
+       for_each_online_cpu(cpu) {
+               p = ipipe_percpu_context(ipd, cpu);
+               if (test_and_clear_bit(irq, p->irqheld_map)) {
+                       /* We need atomic ops here: */
+                       set_bit(irq, p->irqpend_lomap);
+                       set_bit(l0b, &p->irqpend_himap);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
+
+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
+{
+       unsigned long l0m, l1m;
+       int l0b, l1b;
+
+       l0m = p->irqpend_himap;
+       if (unlikely(l0m == 0))
+               return -1;
+
+       l0b = __ipipe_ffnz(l0m);
+       l1m = p->irqpend_lomap[l0b];
+       if (unlikely(l1m == 0))
+               return -1;
+
+       l1b = __ipipe_ffnz(l1m);
+       __clear_bit(l1b, &p->irqpend_lomap[l0b]);
+       if (p->irqpend_lomap[l0b] == 0)
+               __clear_bit(l0b, &p->irqpend_himap);
+
+       return l0b * BITS_PER_LONG + l1b;
+}
+
+#endif /* __IPIPE_2LEVEL_IRQMAP */
+
+void __ipipe_do_sync_pipeline(struct ipipe_domain *top)
+{
+       struct ipipe_percpu_domain_data *p;
+       struct ipipe_domain *ipd;
+
+       /* We must enter over the root domain. */
+       IPIPE_WARN_ONCE(__ipipe_current_domain != ipipe_root_domain);
+       ipd = top;
+next:
+       p = ipipe_this_cpu_context(ipd);
+       if (test_bit(IPIPE_STALL_FLAG, &p->status))
+               return;
+
+       if (__ipipe_ipending_p(p)) {
+               if (ipd == ipipe_root_domain)
+                       __ipipe_sync_stage();
+               else {
+                       /* Switching to head. */
+                       p->coflags &= ~__IPIPE_ALL_R;
+                       __ipipe_set_current_context(p);
+                       __ipipe_sync_stage();
+                       __ipipe_set_current_domain(ipipe_root_domain);
+               }
+       }
+
+       if (ipd != ipipe_root_domain) {
+               ipd = ipipe_root_domain;
+               goto next;
+       }
+}
+EXPORT_SYMBOL_GPL(__ipipe_do_sync_pipeline);
+
+unsigned int ipipe_alloc_virq(void)
+{
+       unsigned long flags, irq = 0;
+       int ipos;
+
+       raw_spin_lock_irqsave(&__ipipe_lock, flags);
+
+       if (__ipipe_virtual_irq_map != ~0) {
+               ipos = ffz(__ipipe_virtual_irq_map);
+               set_bit(ipos, &__ipipe_virtual_irq_map);
+               irq = ipos + IPIPE_VIRQ_BASE;
+       }
+
+       raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
+
+       return irq;
+}
+EXPORT_SYMBOL_GPL(ipipe_alloc_virq);
+
+void ipipe_free_virq(unsigned int virq)
+{
+       clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
+       smp_mb__after_atomic();
+}
+EXPORT_SYMBOL_GPL(ipipe_free_virq);
+
+int ipipe_request_irq(struct ipipe_domain *ipd,
+                     unsigned int irq,
+                     ipipe_irq_handler_t handler,
+                     void *cookie,
+                     ipipe_irq_ackfn_t ackfn)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       ipipe_root_only();
+
+       if (handler == NULL ||
+           (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)))
+               return -EINVAL;
+
+       raw_spin_lock_irqsave(&__ipipe_lock, flags);
+
+       if (ipd->irqs[irq].handler) {
+               ret = -EBUSY;
+               goto out;
+       }
+
+       if (ackfn == NULL)
+               ackfn = ipipe_root_domain->irqs[irq].ackfn;
+
+       ipd->irqs[irq].handler = handler;
+       ipd->irqs[irq].cookie = cookie;
+       ipd->irqs[irq].ackfn = ackfn;
+       ipd->irqs[irq].control = IPIPE_HANDLE_MASK;
+
+       if (irq < IPIPE_NR_ROOT_IRQS)
+               __ipipe_enable_irqdesc(ipd, irq);
+out:
+       raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ipipe_request_irq);
+
+void ipipe_free_irq(struct ipipe_domain *ipd,
+                   unsigned int irq)
+{
+       unsigned long flags;
+
+       ipipe_root_only();
+
+       raw_spin_lock_irqsave(&__ipipe_lock, flags);
+
+       if (ipd->irqs[irq].handler == NULL)
+               goto out;
+
+       ipd->irqs[irq].handler = NULL;
+       ipd->irqs[irq].cookie = NULL;
+       ipd->irqs[irq].ackfn = NULL;
+       ipd->irqs[irq].control = 0;
+
+       if (irq < IPIPE_NR_ROOT_IRQS)
+               __ipipe_disable_irqdesc(ipd, irq);
+out:
+       raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_free_irq);
+
+void ipipe_set_hooks(struct ipipe_domain *ipd, int enables)
+{
+       struct ipipe_percpu_domain_data *p;
+       unsigned long flags;
+       int cpu, wait;
+
+       if (ipd == ipipe_root_domain) {
+               IPIPE_WARN(enables & __IPIPE_TRAP_E);
+               enables &= ~__IPIPE_TRAP_E;
+       } else {
+               IPIPE_WARN(enables & __IPIPE_KEVENT_E);
+               enables &= ~__IPIPE_KEVENT_E;
+       }
+
+       flags = ipipe_critical_enter(NULL);
+
+       for_each_online_cpu(cpu) {
+               p = ipipe_percpu_context(ipd, cpu);
+               p->coflags &= ~__IPIPE_ALL_E;
+               p->coflags |= enables;
+       }
+
+       wait = (enables ^ __IPIPE_ALL_E) << __IPIPE_SHIFT_R;
+       if (wait == 0 || !__ipipe_root_p) {
+               ipipe_critical_exit(flags);
+               return;
+       }
+
+       ipipe_this_cpu_context(ipd)->coflags &= ~wait;
+
+       ipipe_critical_exit(flags);
+
+       /*
+        * In case we cleared some hooks over the root domain, we have
+        * to wait for any ongoing execution to finish, since our
+        * caller might subsequently unmap the target domain code.
+        *
+        * We synchronize with the relevant __ipipe_notify_*()
+        * helpers, disabling all hooks before we start waiting for
+        * completion on all CPUs.
+        */
+       for_each_online_cpu(cpu) {
+               while (ipipe_percpu_context(ipd, cpu)->coflags & wait)
+                       schedule_timeout_interruptible(HZ / 50);
+       }
+}
+EXPORT_SYMBOL_GPL(ipipe_set_hooks);
+
+int __weak ipipe_fastcall_hook(struct pt_regs *regs)
+{
+       return -1;      /* i.e. fall back to slow path. */
+}
+
+int __weak ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs)
+{
+       return 0;
+}
+
+int __weak ipipe_get_domain_slope_hook(struct task_struct *prev,
+                                      struct task_struct *next)
+{
+       /*
+        * A co-kernel must provide this hook, or bad things may
+        * happen when sections protected by fpu_kernel_begin(),
+        * fpu_kernel_end() pairs are preempted by co-kernel threads
+        * also using the FPU!
+        */
+       return 0;
+}
+
+void __ipipe_root_sync(void)
+{
+       struct ipipe_percpu_domain_data *p;
+       unsigned long flags;
+
+       flags = hard_local_irq_save();
+
+       p = ipipe_this_cpu_root_context();
+       if (__ipipe_ipending_p(p))
+               __ipipe_sync_stage();
+
+       hard_local_irq_restore(flags);
+}
+
+int __ipipe_notify_syscall(struct pt_regs *regs)
+{
+       struct ipipe_domain *caller_domain, *this_domain, *ipd;
+       struct ipipe_percpu_domain_data *p;
+       unsigned long flags;
+       int ret = 0;
+
+       /*
+        * We should definitely not pipeline a syscall with IRQs off.
+        */
+       IPIPE_WARN_ONCE(hard_irqs_disabled());
+
+       flags = hard_local_irq_save();
+       caller_domain = this_domain = __ipipe_current_domain;
+       ipd = ipipe_head_domain;
+next:
+       p = ipipe_this_cpu_context(ipd);
+       if (likely(p->coflags & __IPIPE_SYSCALL_E)) {
+               __ipipe_set_current_context(p);
+               p->coflags |= __IPIPE_SYSCALL_R;
+               hard_local_irq_restore(flags);
+               ret = ipipe_syscall_hook(caller_domain, regs);
+               flags = hard_local_irq_save();
+               p->coflags &= ~__IPIPE_SYSCALL_R;
+               if (__ipipe_current_domain != ipd)
+                       /* Account for domain migration. */
+                       this_domain = __ipipe_current_domain;
+               else
+                       __ipipe_set_current_domain(this_domain);
+       }
+
+       if (this_domain == ipipe_root_domain) {
+               if (ipd != ipipe_root_domain && ret == 0) {
+                       ipd = ipipe_root_domain;
+                       goto next;
+               }
+               /*
+                * Careful: we may have migrated from head->root, so p
+                * would be ipipe_this_cpu_context(head).
+                */
+               p = ipipe_this_cpu_root_context();
+               if (__ipipe_ipending_p(p))
+                       __ipipe_sync_stage();
+       } else if (ipipe_test_thread_flag(TIP_MAYDAY))
+               __ipipe_call_mayday(regs);
+
+       hard_local_irq_restore(flags);
+
+       return ret;
+}
+
+int __weak ipipe_trap_hook(struct ipipe_trap_data *data)
+{
+       return 0;
+}
+
+int __ipipe_notify_trap(int exception, struct pt_regs *regs)
+{
+       struct ipipe_percpu_domain_data *p;
+       struct ipipe_trap_data data;
+       unsigned long flags;
+       int ret = 0;
+
+       flags = hard_local_irq_save();
+
+       /*
+        * We send a notification about all traps raised over a
+        * registered head domain only.
+        */
+       if (__ipipe_root_p)
+               goto out;
+
+       p = ipipe_this_cpu_head_context();
+       if (likely(p->coflags & __IPIPE_TRAP_E)) {
+               p->coflags |= __IPIPE_TRAP_R;
+               hard_local_irq_restore(flags);
+               data.exception = exception;
+               data.regs = regs;
+               ret = ipipe_trap_hook(&data);
+               flags = hard_local_irq_save();
+               p->coflags &= ~__IPIPE_TRAP_R;
+       }
+out:
+       hard_local_irq_restore(flags);
+
+       return ret;
+}
+
+int __weak ipipe_kevent_hook(int kevent, void *data)
+{
+       return 0;
+}
+
+int __ipipe_notify_kevent(int kevent, void *data)
+{
+       struct ipipe_percpu_domain_data *p;
+       unsigned long flags;
+       int ret = 0;
+
+       ipipe_root_only();
+
+       flags = hard_local_irq_save();
+
+       p = ipipe_this_cpu_root_context();
+       if (likely(p->coflags & __IPIPE_KEVENT_E)) {
+               p->coflags |= __IPIPE_KEVENT_R;
+               hard_local_irq_restore(flags);
+               ret = ipipe_kevent_hook(kevent, data);
+               flags = hard_local_irq_save();
+               p->coflags &= ~__IPIPE_KEVENT_R;
+       }
+
+       hard_local_irq_restore(flags);
+
+       return ret;
+}
+
+void __ipipe_notify_vm_preemption(void)
+{
+       struct ipipe_vm_notifier *vmf;
+       struct ipipe_percpu_data *p;
+
+       ipipe_check_irqoff();
+       p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
+       vmf = p->vm_notifier;
+       if (unlikely(vmf))
+               vmf->handler(vmf);
+}
+EXPORT_SYMBOL_GPL(__ipipe_notify_vm_preemption);
+
+static void dispatch_irq_head(unsigned int irq) /* hw interrupts off */
+{
+       struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(), *old;
+       struct ipipe_domain *head = p->domain;
+
+       if (unlikely(test_bit(IPIPE_STALL_FLAG, &p->status))) {
+               __ipipe_set_irq_pending(head, irq);
+               return;
+       }
+
+       /* Switch to the head domain if not current. */
+       old = __ipipe_current_context;
+       if (old != p)
+               __ipipe_set_current_context(p);
+
+       p->irqall[irq]++;
+       __set_bit(IPIPE_STALL_FLAG, &p->status);
+       barrier();
+       head->irqs[irq].handler(irq, head->irqs[irq].cookie);
+       __ipipe_run_irqtail(irq);
+       hard_local_irq_disable();
+       p = ipipe_this_cpu_head_context();
+       __clear_bit(IPIPE_STALL_FLAG, &p->status);
+
+       /* Are we still running in the head domain? */
+       if (likely(__ipipe_current_context == p)) {
+               /* Did we enter this code over the head domain? */
+               if (old->domain == head) {
+                       /* Yes, do immediate synchronization. */
+                       if (__ipipe_ipending_p(p))
+                               __ipipe_sync_stage();
+                       return;
+               }
+               __ipipe_set_current_context(ipipe_this_cpu_root_context());
+       }
+
+       /*
+        * We must be running over the root domain, synchronize
+        * the pipeline for high priority IRQs (slow path).
+        */
+       __ipipe_do_sync_pipeline(head);
+}
+
+void __ipipe_dispatch_irq(unsigned int irq, int flags) /* hw interrupts off */
+{
+       struct ipipe_domain *ipd;
+       struct irq_desc *desc;
+       unsigned long control;
+       int chained_irq;
+
+       /*
+        * Survival kit when reading this code:
+        *
+        * - we have two main situations, leading to three cases for
+        *   handling interrupts:
+        *
+        *   a) the root domain is alone, no registered head domain
+        *      => all interrupts go through the interrupt log
+        *   b) a head domain is registered
+        *      => head domain IRQs go through the fast dispatcher
+        *      => root domain IRQs go through the interrupt log
+        *
+        * - when no head domain is registered, ipipe_head_domain ==
+        *   ipipe_root_domain == &ipipe_root.
+        *
+        * - the caller tells us whether we should acknowledge this
+        *   IRQ. Even virtual IRQs may require acknowledge on some
+        *   platforms (e.g. arm/SMP).
+        *
+        * - the caller tells us whether we may try to run the IRQ log
+        *   syncer. Typically, demuxed IRQs won't be synced
+        *   immediately.
+        *
+        * - multiplex IRQs most likely have a valid acknowledge
+        *   handler and we may not be called with IPIPE_IRQF_NOACK
+        *   for them. The ack handler for the multiplex IRQ actually
+        *   decodes the demuxed interrupts.
+        */
+
+#ifdef CONFIG_IPIPE_DEBUG
+       if (unlikely(irq >= IPIPE_NR_IRQS) ||
+           (irq < IPIPE_NR_ROOT_IRQS && irq_to_desc(irq) == NULL)) {
+               pr_err("I-pipe: spurious interrupt %u\n", irq);
+               return;
+       }
+#endif
+       /*
+        * CAUTION: on some archs, virtual IRQs may have acknowledge
+        * handlers. Multiplex IRQs should have one too.
+        */
+       if (unlikely(irq >= IPIPE_NR_ROOT_IRQS)) {
+               desc = NULL;
+               chained_irq = 0;
+       } else {
+               desc = irq_to_desc(irq);
+               chained_irq = desc ? ipipe_chained_irq_p(desc) : 0;
+       }
+       if (flags & IPIPE_IRQF_NOACK)
+               IPIPE_WARN_ONCE(chained_irq);
+       else {
+               ipd = ipipe_head_domain;
+               control = ipd->irqs[irq].control;
+               if ((control & IPIPE_HANDLE_MASK) == 0)
+                       ipd = ipipe_root_domain;
+               if (ipd->irqs[irq].ackfn)
+                       ipd->irqs[irq].ackfn(desc);
+               if (chained_irq) {
+                       if ((flags & IPIPE_IRQF_NOSYNC) == 0)
+                               /* Run demuxed IRQ handlers. */
+                               goto sync;
+                       return;
+               }
+       }
+
+       /*
+        * Sticky interrupts must be handled early and separately, so
+        * that we always process them on the current domain.
+        */
+       ipd = __ipipe_current_domain;
+       control = ipd->irqs[irq].control;
+       if (control & IPIPE_STICKY_MASK)
+               goto log;
+
+       /*
+        * In case we have no registered head domain
+        * (i.e. ipipe_head_domain == &ipipe_root), we always go
+        * through the interrupt log, and leave the dispatching work
+        * ultimately to __ipipe_sync_pipeline().
+        */
+       ipd = ipipe_head_domain;
+       control = ipd->irqs[irq].control;
+       if (ipd == ipipe_root_domain)
+               /*
+                * The root domain must handle all interrupts, so
+                * testing the HANDLE bit would be pointless.
+                */
+               goto log;
+
+       if (control & IPIPE_HANDLE_MASK) {
+               if (unlikely(flags & IPIPE_IRQF_NOSYNC))
+                       __ipipe_set_irq_pending(ipd, irq);
+               else
+                       dispatch_irq_head(irq);
+               return;
+       }
+
+       ipd = ipipe_root_domain;
+log:
+       __ipipe_set_irq_pending(ipd, irq);
+
+       if (flags & IPIPE_IRQF_NOSYNC)
+               return;
+
+       /*
+        * Optimize if we preempted a registered high priority head
+        * domain: we don't need to synchronize the pipeline unless
+        * there is a pending interrupt for it.
+        */
+       if (!__ipipe_root_p &&
+           !__ipipe_ipending_p(ipipe_this_cpu_head_context()))
+               return;
+sync:
+       __ipipe_sync_pipeline(ipipe_head_domain);
+}
+
+void ipipe_raise_irq(unsigned int irq)
+{
+       struct ipipe_domain *ipd = ipipe_head_domain;
+       unsigned long flags, control;
+
+       flags = hard_local_irq_save();
+
+       /*
+        * Fast path: raising a virtual IRQ handled by the head
+        * domain.
+        */
+       if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) {
+               control = ipd->irqs[irq].control;
+               if (likely(control & IPIPE_HANDLE_MASK)) {
+                       dispatch_irq_head(irq);
+                       goto out;
+               }
+       }
+
+       /* Emulate regular device IRQ receipt. */
+       __ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
+out:
+       hard_local_irq_restore(flags);
+
+}
+EXPORT_SYMBOL_GPL(ipipe_raise_irq);
+
+static void sync_root_irqs(void)
+{
+       struct ipipe_percpu_domain_data *p;
+       unsigned long flags;
+
+       flags = hard_local_irq_save();
+
+       p = ipipe_this_cpu_root_context();
+       if (unlikely(__ipipe_ipending_p(p)))
+               __ipipe_sync_stage();
+
+       hard_local_irq_restore(flags);
+}
+
+int ipipe_handle_syscall(struct thread_info *ti,
+                        unsigned long nr, struct pt_regs *regs)
+{
+       unsigned long local_flags = READ_ONCE(ti->ipipe_flags);
+       int ret;
+
+       /*
+        * NOTE: This is a backport from the DOVETAIL syscall
+        * redirector to the older pipeline implementation.
+        *
+        * ==
+        *
+        * If the syscall # is out of bounds and the current IRQ stage
+        * is not the root one, this has to be a non-native system
+        * call handled by some co-kernel on the head stage. Hand it
+        * over to the head stage via the fast syscall handler.
+        *
+        * Otherwise, if the system call is out of bounds or the
+        * current thread is shared with a co-kernel, hand the syscall
+        * over to the latter through the pipeline stages. This
+        * allows:
+        *
+        * - the co-kernel to receive the initial - foreign - syscall
+        * a thread should send for enabling syscall handling by the
+        * co-kernel.
+        *
+        * - the co-kernel to manipulate the current execution stage
+        * for handling the request, which includes switching the
+        * current thread back to the root stage if the syscall is a
+        * native one, or promoting it to the head stage if handling
+        * the foreign syscall requires this.
+        *
+        * Native syscalls from regular (non-pipeline) threads are
+        * ignored by this routine, and flow down to the regular
+        * system call handler.
+        */
+
+       if (nr >= NR_syscalls && (local_flags & _TIP_HEAD)) {
+               ipipe_fastcall_hook(regs);
+               local_flags = READ_ONCE(ti->ipipe_flags);
+               if (local_flags & _TIP_HEAD) {
+                       if (local_flags &  _TIP_MAYDAY)
+                               __ipipe_call_mayday(regs);
+                       return 1; /* don't pass down, no tail work. */
+               } else {
+                       sync_root_irqs();
+                       return -1; /* don't pass down, do tail work. */
+               }
+       }
+
+       if ((local_flags & _TIP_NOTIFY) || nr >= NR_syscalls) {
+               ret =__ipipe_notify_syscall(regs);
+               local_flags = READ_ONCE(ti->ipipe_flags);
+               if (local_flags & _TIP_HEAD)
+                       return 1; /* don't pass down, no tail work. */
+               if (ret)
+                       return -1; /* don't pass down, do tail work. */
+       }
+
+       return 0; /* pass syscall down to the host. */
+}
+
+#ifdef CONFIG_PREEMPT
+
+void preempt_schedule_irq(void);
+
+void __sched __ipipe_preempt_schedule_irq(void)
+{
+       struct ipipe_percpu_domain_data *p;
+       unsigned long flags;
+
+       if (WARN_ON_ONCE(!hard_irqs_disabled()))
+               hard_local_irq_disable();
+
+       local_irq_save(flags);
+       hard_local_irq_enable();
+       preempt_schedule_irq(); /* Ok, may reschedule now. */
+       hard_local_irq_disable();
+
+       /*
+        * Flush any pending interrupt that may have been logged after
+        * preempt_schedule_irq() stalled the root stage before
+        * returning to us, and now.
+        */
+       p = ipipe_this_cpu_root_context();
+       if (unlikely(__ipipe_ipending_p(p))) {
+               trace_hardirqs_on();
+               __clear_bit(IPIPE_STALL_FLAG, &p->status);
+               __ipipe_sync_stage();
+       }
+
+       __ipipe_restore_root_nosync(flags);
+}
+
+#else /* !CONFIG_PREEMPT */
+
+#define __ipipe_preempt_schedule_irq() do { } while (0)
+
+#endif /* !CONFIG_PREEMPT */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define root_stall_after_handler()     local_irq_disable()
+#else
+#define root_stall_after_handler()     do { } while (0)
+#endif
+
+/*
+ * __ipipe_do_sync_stage() -- Flush the pending IRQs for the current
+ * domain (and processor). This routine flushes the interrupt log (see
+ * "Optimistic interrupt protection" from D. Stodolsky et al. for more
+ * on the deferred interrupt scheme). Every interrupt that occurred
+ * while the pipeline was stalled gets played.
+ *
+ * WARNING: CPU migration may occur over this routine.
+ */
+void __ipipe_do_sync_stage(void)
+{
+       struct ipipe_percpu_domain_data *p;
+       struct ipipe_domain *ipd;
+       int irq;
+
+       p = __ipipe_current_context;
+respin:
+       ipd = p->domain;
+
+       __set_bit(IPIPE_STALL_FLAG, &p->status);
+       smp_wmb();
+
+       if (ipd == ipipe_root_domain)
+               trace_hardirqs_off();
+
+       for (;;) {
+               irq = __ipipe_next_irq(p);
+               if (irq < 0)
+                       break;
+               /*
+                * Make sure the compiler does not reorder wrongly, so
+                * that all updates to maps are done before the
+                * handler gets called.
+                */
+               barrier();
+
+               if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
+                       continue;
+
+               if (ipd != ipipe_head_domain)
+                       hard_local_irq_enable();
+
+               if (likely(ipd != ipipe_root_domain)) {
+                       ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+                       __ipipe_run_irqtail(irq);
+                       hard_local_irq_disable();
+               } else if (ipipe_virtual_irq_p(irq)) {
+                       irq_enter();
+                       ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+                       irq_exit();
+                       root_stall_after_handler();
+                       hard_local_irq_disable();
+               } else {
+                       ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
+                       root_stall_after_handler();
+                       hard_local_irq_disable();
+               }
+
+               /*
+                * We may have migrated to a different CPU (1) upon
+                * return from the handler, or downgraded from the
+                * head domain to the root one (2), the opposite way
+                * is NOT allowed though.
+                *
+                * (1) reload the current per-cpu context pointer, so
+                * that we further pull pending interrupts from the
+                * proper per-cpu log.
+                *
+                * (2) check the stall bit to know whether we may
+                * dispatch any interrupt pending for the root domain,
+                * and respin the entire dispatch loop if
+                * so. Otherwise, immediately return to the caller,
+                * _without_ affecting the stall state for the root
+                * domain, since we do not own it at this stage.  This
+                * case is basically reflecting what may happen in
+                * dispatch_irq_head() for the fast path.
+                */
+               p = __ipipe_current_context;
+               if (p->domain != ipd) {
+                       IPIPE_BUG_ON(ipd == ipipe_root_domain);
+                       if (test_bit(IPIPE_STALL_FLAG, &p->status))
+                               return;
+                       goto respin;
+               }
+       }
+
+       if (ipd == ipipe_root_domain)
+               trace_hardirqs_on();
+
+       __clear_bit(IPIPE_STALL_FLAG, &p->status);
+}
+
+void __ipipe_call_mayday(struct pt_regs *regs)
+{
+       unsigned long flags;
+
+       ipipe_clear_thread_flag(TIP_MAYDAY);
+       flags = hard_local_irq_save();
+       __ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs);
+       hard_local_irq_restore(flags);
+}
+
+#ifdef CONFIG_SMP
+
+/* Always called with hw interrupts off. */
+void __ipipe_do_critical_sync(unsigned int irq, void *cookie)
+{
+       int cpu = ipipe_processor_id();
+
+       cpumask_set_cpu(cpu, &__ipipe_cpu_sync_map);
+
+       /*
+        * Now we are in sync with the lock requestor running on
+        * another CPU. Enter a spinning wait until he releases the
+        * global lock.
+        */
+       raw_spin_lock(&__ipipe_cpu_barrier);
+
+       /* Got it. Now get out. */
+
+       /* Call the sync routine if any. */
+       if (__ipipe_cpu_sync)
+               __ipipe_cpu_sync();
+
+       cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map);
+
+       raw_spin_unlock(&__ipipe_cpu_barrier);
+
+       cpumask_clear_cpu(cpu, &__ipipe_cpu_sync_map);
+}
+#endif /* CONFIG_SMP */
+
+unsigned long ipipe_critical_enter(void (*syncfn)(void))
+{
+       cpumask_t allbutself __maybe_unused, online __maybe_unused;
+       int cpu __maybe_unused, n __maybe_unused;
+       unsigned long flags, loops __maybe_unused;
+
+       flags = hard_local_irq_save();
+
+       if (num_online_cpus() == 1)
+               return flags;
+
+#ifdef CONFIG_SMP
+
+       cpu = ipipe_processor_id();
+       if (!cpumask_test_and_set_cpu(cpu, &__ipipe_cpu_lock_map)) {
+               while (test_and_set_bit(0, &__ipipe_critical_lock)) {
+                       n = 0;
+                       hard_local_irq_enable();
+
+                       do
+                               cpu_relax();
+                       while (++n < cpu);
+
+                       hard_local_irq_disable();
+               }
+restart:
+               online = *cpu_online_mask;
+               raw_spin_lock(&__ipipe_cpu_barrier);
+
+               __ipipe_cpu_sync = syncfn;
+
+               cpumask_clear(&__ipipe_cpu_pass_map);
+               cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map);
+
+               /*
+                * Send the sync IPI to all processors but the current
+                * one.
+                */
+               cpumask_andnot(&allbutself, &online, &__ipipe_cpu_pass_map);
+               ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself);
+               loops = IPIPE_CRITICAL_TIMEOUT;
+
+               while (!cpumask_equal(&__ipipe_cpu_sync_map, &allbutself)) {
+                       if (--loops > 0) {
+                               cpu_relax();
+                               continue;
+                       }
+                       /*
+                        * We ran into a deadlock due to a contended
+                        * rwlock. Cancel this round and retry.
+                        */
+                       __ipipe_cpu_sync = NULL;
+
+                       raw_spin_unlock(&__ipipe_cpu_barrier);
+                       /*
+                        * Ensure all CPUs consumed the IPI to avoid
+                        * running __ipipe_cpu_sync prematurely. This
+                        * usually resolves the deadlock reason too.
+                        */
+                       while (!cpumask_equal(&online, &__ipipe_cpu_pass_map))
+                               cpu_relax();
+
+                       goto restart;
+               }
+       }
+
+       atomic_inc(&__ipipe_critical_count);
+
+#endif /* CONFIG_SMP */
+
+       return flags;
+}
+EXPORT_SYMBOL_GPL(ipipe_critical_enter);
+
+void ipipe_critical_exit(unsigned long flags)
+{
+       if (num_online_cpus() == 1) {
+               hard_local_irq_restore(flags);
+               return;
+       }
+
+#ifdef CONFIG_SMP
+       if (atomic_dec_and_test(&__ipipe_critical_count)) {
+               raw_spin_unlock(&__ipipe_cpu_barrier);
+               while (!cpumask_empty(&__ipipe_cpu_sync_map))
+                       cpu_relax();
+               cpumask_clear_cpu(ipipe_processor_id(), &__ipipe_cpu_lock_map);
+               clear_bit(0, &__ipipe_critical_lock);
+               smp_mb__after_atomic();
+       }
+#endif /* CONFIG_SMP */
+
+       hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_critical_exit);
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+void ipipe_root_only(void)
+{
+       struct ipipe_domain *this_domain;
+       unsigned long flags;
+
+       flags = hard_smp_local_irq_save();
+
+       this_domain = __ipipe_current_domain;
+       if (likely(this_domain == ipipe_root_domain &&
+                  !test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) {
+               hard_smp_local_irq_restore(flags);
+               return;
+       }
+
+       if (!__this_cpu_read(ipipe_percpu.context_check)) {
+               hard_smp_local_irq_restore(flags);
+               return;
+       }
+
+       hard_smp_local_irq_restore(flags);
+
+       ipipe_prepare_panic();
+       ipipe_trace_panic_freeze();
+
+       if (this_domain != ipipe_root_domain)
+               pr_err("I-pipe: Detected illicit call from head domain '%s'\n"
+                      "        into a regular Linux service\n",
+                      this_domain->name);
+       else
+               pr_err("I-pipe: Detected stalled head domain, "
+                       "probably caused by a bug.\n"
+                       "        A critical section may have been "
+                       "left unterminated.\n");
+       dump_stack();
+       ipipe_trace_panic_dump();
+}
+EXPORT_SYMBOL(ipipe_root_only);
+
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
+
+int notrace __ipipe_check_percpu_access(void)
+{
+       struct ipipe_percpu_domain_data *p;
+       struct ipipe_domain *this_domain;
+       unsigned long flags;
+       int ret = 0;
+
+       flags = hard_local_irq_save_notrace();
+
+       /*
+        * Don't use __ipipe_current_domain here, this would recurse
+        * indefinitely.
+        */
+       this_domain = raw_cpu_read(ipipe_percpu.curr)->domain;
+
+       /*
+        * Only the root domain may implement preemptive CPU migration
+        * of tasks, so anything above in the pipeline should be fine.
+        */
+       if (this_domain != ipipe_root_domain)
+               goto out;
+
+       if (raw_irqs_disabled_flags(flags))
+               goto out;
+
+       /*
+        * Last chance: hw interrupts were enabled on entry while
+        * running over the root domain, but the root stage might be
+        * currently stalled, in which case preemption would be
+        * disabled, and no migration could occur.
+        */
+
+       p = raw_cpu_ptr(&ipipe_percpu.root);
+       if (!preemptible())
+               goto out;
+       /*
+        * Our caller may end up accessing the wrong per-cpu variable
+        * instance due to CPU migration; tell it to complain about
+        * this.
+        */
+       ret = 1;
+out:
+       hard_local_irq_restore_notrace(flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__ipipe_check_percpu_access);
+
+void __ipipe_spin_unlock_debug(unsigned long flags)
+{
+       /*
+        * We catch a nasty issue where spin_unlock_irqrestore() on a
+        * regular kernel spinlock is about to re-enable hw interrupts
+        * in a section entered with hw irqs off. This is clearly the
+        * sign of a massive breakage coming. Usual suspect is a
+        * regular spinlock which was overlooked, used within a
+        * section which must run with hw irqs disabled.
+        */
+       IPIPE_WARN_ONCE(!raw_irqs_disabled_flags(flags) && hard_irqs_disabled());
+}
+EXPORT_SYMBOL(__ipipe_spin_unlock_debug);
+
+#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */
+
+void ipipe_prepare_panic(void)
+{
+#ifdef CONFIG_PRINTK
+       __ipipe_printk_bypass = 1;
+#endif
+       ipipe_context_check_off();
+}
+EXPORT_SYMBOL_GPL(ipipe_prepare_panic);
+
+static void __ipipe_do_work(unsigned int virq, void *cookie)
+{
+       struct ipipe_work_header *work;
+       unsigned long flags;
+       void *curr, *tail;
+       int cpu;
+
+       /*
+        * Work is dispatched in enqueuing order. This interrupt
+        * context can't migrate to another CPU.
+        */
+       cpu = smp_processor_id();
+       curr = per_cpu(work_buf, cpu);
+
+       for (;;) {
+               flags = hard_local_irq_save();
+               tail = per_cpu(work_tail, cpu);
+               if (curr == tail) {
+                       per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu);
+                       hard_local_irq_restore(flags);
+                       return;
+               }
+               work = curr;
+               curr += work->size;
+               hard_local_irq_restore(flags);
+               work->handler(work);
+       }
+}
+
+void __ipipe_post_work_root(struct ipipe_work_header *work)
+{
+       unsigned long flags;
+       void *tail;
+       int cpu;
+
+       /*
+        * Subtle: we want to use the head stall/unstall operators,
+        * not the hard_* routines to protect against races. This way,
+        * we ensure that a root-based caller will trigger the virq
+        * handling immediately when unstalling the head stage, as a
+        * result of calling __ipipe_sync_pipeline() under the hood.
+        */
+       flags = ipipe_test_and_stall_head();
+       cpu = ipipe_processor_id();
+       tail = per_cpu(work_tail, cpu);
+
+       if (WARN_ON_ONCE((unsigned char *)tail + work->size >=
+                        per_cpu(work_buf, cpu) + WORKBUF_SIZE))
+               goto out;
+
+       /* Work handling is deferred, so data has to be copied. */
+       memcpy(tail, work, work->size);
+       per_cpu(work_tail, cpu) = tail + work->size;
+       ipipe_post_irq_root(__ipipe_work_virq);
+out:
+       ipipe_restore_head(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_post_work_root);
+
+void __weak __ipipe_arch_share_current(int flags)
+{
+}
+
+void __ipipe_share_current(int flags)
+{
+       ipipe_root_only();
+
+       __ipipe_arch_share_current(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_share_current);
+
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
+       defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
+void __ipipe_uaccess_might_fault(void)
+{
+       struct ipipe_percpu_domain_data *pdd;
+       struct ipipe_domain *ipd;
+       unsigned long flags;
+       
+       flags = hard_local_irq_save();
+       ipd = __ipipe_current_domain;
+       if (ipd == ipipe_root_domain) {
+               hard_local_irq_restore(flags);
+               might_fault();
+               return;
+       }
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+       pdd = ipipe_this_cpu_context(ipd);
+       WARN_ON_ONCE(hard_irqs_disabled_flags(flags) 
+                    || test_bit(IPIPE_STALL_FLAG, &pdd->status));
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+       (void)pdd;
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+       hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(__ipipe_uaccess_might_fault);
+#endif
diff --git a/kernel/ipipe/timer.c b/kernel/ipipe/timer.c
new file mode 100644 (file)
index 0000000..0da9564
--- /dev/null
@@ -0,0 +1,588 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/timer.c
+ *
+ * Copyright (C) 2012 Gilles Chanteperdrix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * I-pipe timer request interface.
+ */
+#include <linux/ipipe.h>
+#include <linux/percpu.h>
+#include <linux/irqdesc.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/ipipe_tickdev.h>
+#include <linux/interrupt.h>
+#include <linux/export.h>
+
+unsigned long __ipipe_hrtimer_freq;
+
+static LIST_HEAD(timers);
+static IPIPE_DEFINE_SPINLOCK(lock);
+
+static DEFINE_PER_CPU(struct ipipe_timer *, percpu_timer);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+/*
+ * Default request method: switch to oneshot mode if supported.
+ */
+static void ipipe_timer_default_request(struct ipipe_timer *timer, int steal)
+{
+       struct clock_event_device *evtdev = timer->host_timer;
+
+       if (!(evtdev->features & CLOCK_EVT_FEAT_ONESHOT))
+               return;
+
+       if (clockevent_state_oneshot(evtdev) ||
+               clockevent_state_oneshot_stopped(evtdev))
+               timer->orig_mode = CLOCK_EVT_MODE_ONESHOT;
+       else {
+               if (clockevent_state_periodic(evtdev))
+                       timer->orig_mode = CLOCK_EVT_MODE_PERIODIC;
+               else if (clockevent_state_shutdown(evtdev))
+                       timer->orig_mode = CLOCK_EVT_MODE_SHUTDOWN;
+               else
+                       timer->orig_mode = CLOCK_EVT_MODE_UNUSED;
+               evtdev->set_state_oneshot(evtdev);
+               evtdev->set_next_event(timer->freq / HZ, evtdev);
+       }
+}
+
+/*
+ * Default release method: return the timer to the mode it had when
+ * starting.
+ */
+static void ipipe_timer_default_release(struct ipipe_timer *timer)
+{
+       struct clock_event_device *evtdev = timer->host_timer;
+
+       switch (timer->orig_mode) {
+       case CLOCK_EVT_MODE_SHUTDOWN:
+               evtdev->set_state_shutdown(evtdev);
+               break;
+       case CLOCK_EVT_MODE_PERIODIC:
+               evtdev->set_state_periodic(evtdev);
+       case CLOCK_EVT_MODE_ONESHOT:
+               evtdev->set_next_event(timer->freq / HZ, evtdev);
+               break;
+       }
+}
+
+static int get_dev_mode(struct clock_event_device *evtdev)
+{
+       if (clockevent_state_oneshot(evtdev) ||
+               clockevent_state_oneshot_stopped(evtdev))
+               return CLOCK_EVT_MODE_ONESHOT;
+
+       if (clockevent_state_periodic(evtdev))
+               return CLOCK_EVT_MODE_PERIODIC;
+
+       if (clockevent_state_shutdown(evtdev))
+               return CLOCK_EVT_MODE_SHUTDOWN;
+
+       return CLOCK_EVT_MODE_UNUSED;
+}
+
+void ipipe_host_timer_register(struct clock_event_device *evtdev)
+{
+       struct ipipe_timer *timer = evtdev->ipipe_timer;
+
+       if (timer == NULL)
+               return;
+
+       timer->orig_mode = CLOCK_EVT_MODE_UNUSED;
+       
+       if (timer->request == NULL)
+               timer->request = ipipe_timer_default_request;
+
+       /*
+        * By default, use the same method as linux timer, on ARM at
+        * least, most set_next_event methods are safe to be called
+        * from Xenomai domain anyway.
+        */
+       if (timer->set == NULL) {
+               timer->timer_set = evtdev;
+               timer->set = (typeof(timer->set))evtdev->set_next_event;
+       }
+
+       if (timer->release == NULL)
+               timer->release = ipipe_timer_default_release;
+
+       if (timer->name == NULL)
+               timer->name = evtdev->name;
+
+       if (timer->rating == 0)
+               timer->rating = evtdev->rating;
+
+       timer->freq = (1000000000ULL * evtdev->mult) >> evtdev->shift;
+
+       if (timer->min_delay_ticks == 0)
+               timer->min_delay_ticks =
+                       (evtdev->min_delta_ns * evtdev->mult) >> evtdev->shift;
+
+       if (timer->cpumask == NULL)
+               timer->cpumask = evtdev->cpumask;
+
+       timer->host_timer = evtdev;
+
+       ipipe_timer_register(timer);
+}
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+/*
+ * register a timer: maintain them in a list sorted by rating
+ */
+void ipipe_timer_register(struct ipipe_timer *timer)
+{
+       struct ipipe_timer *t;
+       unsigned long flags;
+
+       if (timer->timer_set == NULL)
+               timer->timer_set = timer;
+
+       if (timer->cpumask == NULL)
+               timer->cpumask = cpumask_of(smp_processor_id());
+
+       raw_spin_lock_irqsave(&lock, flags);
+
+       list_for_each_entry(t, &timers, link) {
+               if (t->rating <= timer->rating) {
+                       __list_add(&timer->link, t->link.prev, &t->link);
+                       goto done;
+               }
+       }
+       list_add_tail(&timer->link, &timers);
+  done:
+       raw_spin_unlock_irqrestore(&lock, flags);
+}
+
+static void ipipe_timer_request_sync(void)
+{
+       struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+       struct clock_event_device *evtdev;
+       int steal;
+
+       if (!timer)
+               return;
+
+       evtdev = timer->host_timer;
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+       steal = evtdev != NULL && !clockevent_state_detached(evtdev);
+#else /* !CONFIG_GENERIC_CLOCKEVENTS */
+       steal = 1;
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
+
+       timer->request(timer, steal);
+}
+
+static void config_pcpu_timer(struct ipipe_timer *t, unsigned hrclock_freq)
+{
+       unsigned long long tmp;
+       unsigned hrtimer_freq;
+
+       if (__ipipe_hrtimer_freq != t->freq)
+               __ipipe_hrtimer_freq = t->freq;
+
+       hrtimer_freq = t->freq;
+       if (__ipipe_hrclock_freq > UINT_MAX)
+               hrtimer_freq /= 1000;
+
+       t->c2t_integ = hrtimer_freq / hrclock_freq;
+       tmp = (((unsigned long long)
+               (hrtimer_freq % hrclock_freq)) << 32)
+               + hrclock_freq - 1;
+       do_div(tmp, hrclock_freq);
+       t->c2t_frac = tmp;
+}
+
+/* Set up a timer as per-cpu timer for ipipe */
+static void install_pcpu_timer(unsigned cpu, unsigned hrclock_freq,
+                             struct ipipe_timer *t)
+{
+       per_cpu(ipipe_percpu.hrtimer_irq, cpu) = t->irq;
+       per_cpu(percpu_timer, cpu) = t;
+       config_pcpu_timer(t, hrclock_freq);
+}
+
+static void select_root_only_timer(unsigned cpu, unsigned hrclock_khz,
+                                  const struct cpumask *mask,
+                                  struct ipipe_timer *t) {
+       unsigned icpu;
+       struct clock_event_device *evtdev;
+
+       /*
+        * If no ipipe-supported CPU shares an interrupt with the
+        * timer, we do not need to care about it.
+        */
+       for_each_cpu(icpu, mask) {
+               if (t->irq == per_cpu(ipipe_percpu.hrtimer_irq, icpu)) {
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+                       evtdev = t->host_timer;
+                       if (evtdev && clockevent_state_shutdown(evtdev))
+                               continue;
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+                       goto found;
+               }
+       }
+
+       return;
+
+found:
+       install_pcpu_timer(cpu, hrclock_khz, t);
+}
+
+/*
+ * Choose per-cpu timers with the highest rating by traversing the
+ * rating-sorted list for each CPU.
+ */
+int ipipe_select_timers(const struct cpumask *mask)
+{
+       unsigned hrclock_freq;
+       unsigned long long tmp;
+       struct ipipe_timer *t;
+       struct clock_event_device *evtdev;
+       unsigned long flags;
+       unsigned cpu;
+       cpumask_t fixup;
+
+       if (!__ipipe_hrclock_ok()) {
+               printk("I-pipe: high-resolution clock not working\n");
+               return -ENODEV;
+       }
+
+       if (__ipipe_hrclock_freq > UINT_MAX) {
+               tmp = __ipipe_hrclock_freq;
+               do_div(tmp, 1000);
+               hrclock_freq = tmp;
+       } else
+               hrclock_freq = __ipipe_hrclock_freq;
+
+       raw_spin_lock_irqsave(&lock, flags);
+
+       /* First, choose timers for the CPUs handled by ipipe */
+       for_each_cpu(cpu, mask) {
+               list_for_each_entry(t, &timers, link) {
+                       if (!cpumask_test_cpu(cpu, t->cpumask))
+                               continue;
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+                       evtdev = t->host_timer;
+                       if (evtdev && clockevent_state_shutdown(evtdev))
+                               continue;
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+                       goto found;
+               }
+
+               printk("I-pipe: could not find timer for cpu #%d\n",
+                      cpu);
+               goto err_remove_all;
+found:
+               install_pcpu_timer(cpu, hrclock_freq, t);
+       }
+
+       /*
+        * Second, check if we need to fix up any CPUs not supported
+        * by ipipe (but by Linux) whose interrupt may need to be
+        * forwarded because they have the same IRQ as an ipipe-enabled
+        * timer.
+        */
+       cpumask_andnot(&fixup, cpu_online_mask, mask);
+
+       for_each_cpu(cpu, &fixup) {
+               list_for_each_entry(t, &timers, link) {
+                       if (!cpumask_test_cpu(cpu, t->cpumask))
+                               continue;
+
+                       select_root_only_timer(cpu, hrclock_freq, mask, t);
+               }
+       }
+
+       raw_spin_unlock_irqrestore(&lock, flags);
+
+       flags = ipipe_critical_enter(ipipe_timer_request_sync);
+       ipipe_timer_request_sync();
+       ipipe_critical_exit(flags);
+
+       return 0;
+
+err_remove_all:
+       raw_spin_unlock_irqrestore(&lock, flags);
+
+       for_each_cpu(cpu, mask) {
+               per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1;
+               per_cpu(percpu_timer, cpu) = NULL;
+       }
+       __ipipe_hrtimer_freq = 0;
+
+       return -ENODEV;
+}
+
+static void ipipe_timer_release_sync(void)
+{
+       struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+       if (timer)
+               timer->release(timer);
+}
+
+void ipipe_timers_release(void)
+{
+       unsigned long flags;
+       unsigned cpu;
+
+       flags = ipipe_critical_enter(ipipe_timer_release_sync);
+       ipipe_timer_release_sync();
+       ipipe_critical_exit(flags);
+
+       for_each_online_cpu(cpu) {
+               per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1;
+               per_cpu(percpu_timer, cpu) = NULL;
+               __ipipe_hrtimer_freq = 0;
+       }
+}
+
+static void __ipipe_ack_hrtimer_irq(struct irq_desc *desc)
+{
+       struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+       if (desc)
+               desc->ipipe_ack(desc);
+       if (timer->ack)
+               timer->ack();
+       if (desc)
+               desc->ipipe_end(desc);
+}
+
+static int do_set_oneshot(struct clock_event_device *cdev)
+{
+       struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+       timer->mode_handler(CLOCK_EVT_MODE_ONESHOT, cdev);
+
+       return 0;
+}
+
+static int do_set_periodic(struct clock_event_device *cdev)
+{
+       struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+       timer->mode_handler(CLOCK_EVT_MODE_PERIODIC, cdev);
+
+       return 0;
+}
+
+static int do_set_shutdown(struct clock_event_device *cdev)
+{
+       struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
+
+       timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev);
+
+       return 0;
+}
+
+int ipipe_timer_start(void (*tick_handler)(void),
+                     void (*emumode)(enum clock_event_mode mode,
+                                     struct clock_event_device *cdev),
+                     int (*emutick)(unsigned long evt,
+                                    struct clock_event_device *cdev),
+                     unsigned cpu)
+{
+       struct clock_event_device *evtdev;
+       struct ipipe_timer *timer;
+       struct irq_desc *desc;
+       unsigned long flags;
+       int steal, ret;
+
+       timer = per_cpu(percpu_timer, cpu);
+       evtdev = timer->host_timer;
+
+       flags = ipipe_critical_enter(NULL);
+
+       ret = ipipe_request_irq(ipipe_head_domain, timer->irq,
+                               (ipipe_irq_handler_t)tick_handler,
+                               NULL, __ipipe_ack_hrtimer_irq);
+       if (ret < 0 && ret != -EBUSY) {
+               ipipe_critical_exit(flags);
+               return ret;
+       }
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+       steal = evtdev != NULL && !clockevent_state_detached(evtdev);
+       if (steal && evtdev->ipipe_stolen == 0) {
+               timer->real_mult = evtdev->mult;
+               timer->real_shift = evtdev->shift;
+               timer->orig_set_state_periodic = evtdev->set_state_periodic;
+               timer->orig_set_state_oneshot = evtdev->set_state_oneshot;
+               timer->orig_set_state_oneshot_stopped = evtdev->set_state_oneshot_stopped;
+               timer->orig_set_state_shutdown = evtdev->set_state_shutdown;
+               timer->orig_set_next_event = evtdev->set_next_event;
+               timer->mode_handler = emumode;
+               evtdev->mult = 1;
+               evtdev->shift = 0;
+               evtdev->max_delta_ns = UINT_MAX;
+               evtdev->set_state_periodic = do_set_periodic;
+               evtdev->set_state_oneshot = do_set_oneshot;
+               evtdev->set_state_oneshot_stopped = do_set_oneshot;
+               evtdev->set_state_shutdown = do_set_shutdown;
+               evtdev->set_next_event = emutick;
+               evtdev->ipipe_stolen = 1;
+       }
+
+       ret = get_dev_mode(evtdev);
+#else /* CONFIG_GENERIC_CLOCKEVENTS */
+       steal = 1;
+       ret = 0;
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+       ipipe_critical_exit(flags);
+
+       desc = irq_to_desc(timer->irq);
+       if (desc && irqd_irq_disabled(&desc->irq_data))
+               ipipe_enable_irq(timer->irq);
+
+       return ret;
+}
+
+void ipipe_timer_stop(unsigned cpu)
+{
+       unsigned long __maybe_unused flags;
+       struct clock_event_device *evtdev;
+       struct ipipe_timer *timer;
+       struct irq_desc *desc;
+
+       timer = per_cpu(percpu_timer, cpu);
+       evtdev = timer->host_timer;
+
+       desc = irq_to_desc(timer->irq);
+       if (desc && irqd_irq_disabled(&desc->irq_data))
+               ipipe_disable_irq(timer->irq);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+       if (evtdev) {
+               flags = ipipe_critical_enter(NULL);
+
+               if (evtdev->ipipe_stolen) {
+                       evtdev->mult = timer->real_mult;
+                       evtdev->shift = timer->real_shift;
+                       evtdev->set_state_periodic = timer->orig_set_state_periodic;
+                       evtdev->set_state_oneshot = timer->orig_set_state_oneshot;
+                       evtdev->set_state_oneshot_stopped = timer->orig_set_state_oneshot_stopped;
+                       evtdev->set_state_shutdown = timer->orig_set_state_shutdown;
+                       evtdev->set_next_event = timer->orig_set_next_event;
+                       evtdev->ipipe_stolen = 0;
+               }
+
+               ipipe_critical_exit(flags);
+       }
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+       ipipe_free_irq(ipipe_head_domain, timer->irq);
+}
+
+void ipipe_timer_set(unsigned long cdelay)
+{
+       unsigned long tdelay;
+       struct ipipe_timer *t;
+
+       t = __ipipe_raw_cpu_read(percpu_timer);
+
+       /*
+        * Even though some architectures may use a 64 bits delay
+        * here, we voluntarily limit to 32 bits, 4 billions ticks
+        * should be enough for now. Would a timer needs more, an
+        * extra call to the tick handler would simply occur after 4
+        * billions ticks.
+        */
+       if (cdelay > UINT_MAX)
+               cdelay = UINT_MAX;
+
+       tdelay = cdelay;
+       if (t->c2t_integ != 1)
+               tdelay *= t->c2t_integ;
+       if (t->c2t_frac)
+               tdelay += ((unsigned long long)cdelay * t->c2t_frac) >> 32;
+       if (tdelay < t->min_delay_ticks)
+               tdelay = t->min_delay_ticks;
+
+       if (t->set(tdelay, t->timer_set) < 0)
+               ipipe_raise_irq(t->irq);
+}
+EXPORT_SYMBOL_GPL(ipipe_timer_set);
+
+const char *ipipe_timer_name(void)
+{
+       return per_cpu(percpu_timer, 0)->name;
+}
+EXPORT_SYMBOL_GPL(ipipe_timer_name);
+
+unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns)
+{
+       unsigned long long tmp;
+       BUG_ON(!timer->freq);
+       tmp = (unsigned long long)ns * timer->freq;
+       do_div(tmp, 1000000000);
+       return tmp;
+}
+
+#ifdef CONFIG_IPIPE_HAVE_HOSTRT
+/*
+ * NOTE: The architecture specific code must only call this function
+ * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled.
+ * The event receiver is responsible for providing proper locking.
+ */
+void ipipe_update_hostrt(struct timekeeper *tk)
+{
+       struct tk_read_base *tkr = &tk->tkr_mono;
+       struct clocksource *clock = tkr->clock;
+       struct ipipe_hostrt_data data;
+       struct timespec xt;
+
+       xt.tv_sec = tk->xtime_sec;
+       xt.tv_nsec = (long)(tkr->xtime_nsec >> tkr->shift);
+       ipipe_root_only();
+       data.live = 1;
+       data.cycle_last = tkr->cycle_last;
+       data.mask = clock->mask;
+       data.mult = tkr->mult;
+       data.shift = tkr->shift;
+       data.wall_time_sec = xt.tv_sec;
+       data.wall_time_nsec = xt.tv_nsec;
+       data.wall_to_monotonic.tv_sec = tk->wall_to_monotonic.tv_sec;
+       data.wall_to_monotonic.tv_nsec = tk->wall_to_monotonic.tv_nsec;
+       __ipipe_notify_kevent(IPIPE_KEVT_HOSTRT, &data);
+}
+
+#endif /* CONFIG_IPIPE_HAVE_HOSTRT */
+
+int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
+                             bool force);
+
+void __ipipe_timer_refresh_freq(unsigned int hrclock_freq)
+{
+       struct ipipe_timer *t = __ipipe_raw_cpu_read(percpu_timer);
+       unsigned long flags;
+
+       if (t && t->refresh_freq) {
+               t->freq = t->refresh_freq();
+               flags = hard_local_irq_save();
+               config_pcpu_timer(t, hrclock_freq);
+               hard_local_irq_restore(flags);
+               clockevents_program_event(t->host_timer,
+                                         t->host_timer->next_event, false);
+       }
+}
diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c
new file mode 100644 (file)
index 0000000..7f4d03a
--- /dev/null
@@ -0,0 +1,1486 @@
+/* -*- linux-c -*-
+ * kernel/ipipe/tracer.c
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *              2005-2008 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/vmalloc.h>
+#include <linux/pid.h>
+#include <linux/vermagic.h>
+#include <linux/sched.h>
+#include <linux/ipipe.h>
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+
+#define IPIPE_TRACE_PATHS          4 /* <!> Do not lower below 3 */
+#define IPIPE_DEFAULT_ACTIVE       0
+#define IPIPE_DEFAULT_MAX          1
+#define IPIPE_DEFAULT_FROZEN       2
+
+#define IPIPE_TRACE_POINTS         (1 << CONFIG_IPIPE_TRACE_SHIFT)
+#define WRAP_POINT_NO(point)       ((point) & (IPIPE_TRACE_POINTS-1))
+
+#define IPIPE_DEFAULT_PRE_TRACE            10
+#define IPIPE_DEFAULT_POST_TRACE    10
+#define IPIPE_DEFAULT_BACK_TRACE    100
+
+#define IPIPE_DELAY_NOTE           1000  /* in nanoseconds */
+#define IPIPE_DELAY_WARN           10000 /* in nanoseconds */
+
+#define IPIPE_TFLG_NMI_LOCK        0x0001
+#define IPIPE_TFLG_NMI_HIT         0x0002
+#define IPIPE_TFLG_NMI_FREEZE_REQ   0x0004
+
+#define IPIPE_TFLG_HWIRQ_OFF       0x0100
+#define IPIPE_TFLG_FREEZING        0x0200
+#define IPIPE_TFLG_CURRDOM_SHIFT    10  /* bits 10..11: current domain */
+#define IPIPE_TFLG_CURRDOM_MASK            0x0C00
+#define IPIPE_TFLG_DOMSTATE_SHIFT   12  /* bits 12..15: domain stalled? */
+#define IPIPE_TFLG_DOMSTATE_BITS    1
+
+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
+       (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
+       ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
+
+struct ipipe_trace_point {
+       short type;
+       short flags;
+       unsigned long eip;
+       unsigned long parent_eip;
+       unsigned long v;
+       unsigned long long timestamp;
+};
+
+struct ipipe_trace_path {
+       volatile int flags;
+       int dump_lock; /* separated from flags due to cross-cpu access */
+       int trace_pos; /* next point to fill */
+       int begin, end; /* finalised path begin and end */
+       int post_trace; /* non-zero when in post-trace phase */
+       unsigned long long length; /* max path length in cycles */
+       unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
+       unsigned long nmi_saved_parent_eip;
+       unsigned long nmi_saved_v;
+       struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
+} ____cacheline_aligned_in_smp;
+
+enum ipipe_trace_type
+{
+       IPIPE_TRACE_FUNC = 0,
+       IPIPE_TRACE_BEGIN,
+       IPIPE_TRACE_END,
+       IPIPE_TRACE_FREEZE,
+       IPIPE_TRACE_SPECIAL,
+       IPIPE_TRACE_PID,
+       IPIPE_TRACE_EVENT,
+};
+
+#define IPIPE_TYPE_MASK                    0x0007
+#define IPIPE_TYPE_BITS                    3
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path);
+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
+static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) =
+       { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } };
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+int ipipe_trace_enable = 0;
+
+static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE };
+static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX };
+static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN };
+static IPIPE_DEFINE_SPINLOCK(global_path_lock);
+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
+static int post_trace = IPIPE_DEFAULT_POST_TRACE;
+static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
+static int verbose_trace = 1;
+static unsigned long trace_overhead;
+
+static unsigned long trigger_begin;
+static unsigned long trigger_end;
+
+static DEFINE_MUTEX(out_mutex);
+static struct ipipe_trace_path *print_path;
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+static struct ipipe_trace_path *panic_path;
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+static int print_pre_trace;
+static int print_post_trace;
+
+
+static long __ipipe_signed_tsc2us(long long tsc);
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
+
+static inline void store_states(struct ipipe_domain *ipd,
+                               struct ipipe_trace_point *point, int pos)
+{
+       if (test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status))
+               point->flags |= 1 << (pos + IPIPE_TFLG_DOMSTATE_SHIFT);
+
+       if (ipd == __ipipe_current_domain)
+               point->flags |= pos << IPIPE_TFLG_CURRDOM_SHIFT;
+}
+
+static notrace void
+__ipipe_store_domain_states(struct ipipe_trace_point *point)
+{
+       store_states(ipipe_root_domain, point, 0);
+       if (ipipe_head_domain != ipipe_root_domain)
+               store_states(ipipe_head_domain, point, 1);
+}
+
+static notrace int __ipipe_get_free_trace_path(int old, int cpu)
+{
+       int new_active = old;
+       struct ipipe_trace_path *tp;
+
+       do {
+               if (++new_active == IPIPE_TRACE_PATHS)
+                       new_active = 0;
+               tp = &per_cpu(trace_path, cpu)[new_active];
+       } while (new_active == per_cpu(max_path, cpu) ||
+                new_active == per_cpu(frozen_path, cpu) ||
+                tp->dump_lock);
+
+       return new_active;
+}
+
+static notrace void
+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
+                         struct ipipe_trace_path *old_tp, int old_pos)
+{
+       int i;
+
+       new_tp->trace_pos = pre_trace+1;
+
+       for (i = new_tp->trace_pos; i > 0; i--)
+               memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
+                      &old_tp->point[WRAP_POINT_NO(old_pos-i)],
+                      sizeof(struct ipipe_trace_point));
+
+       /* mark the end (i.e. the point before point[0]) invalid */
+       new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos)
+{
+       struct ipipe_trace_path *old_tp = tp;
+       long active = per_cpu(active_path, cpu);
+       unsigned long long length;
+
+       /* do we have a new worst case? */
+       length = tp->point[tp->end].timestamp -
+                tp->point[tp->begin].timestamp;
+       if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) {
+               /* we need protection here against other cpus trying
+                  to start a proc dump */
+               raw_spin_lock(&global_path_lock);
+
+               /* active path holds new worst case */
+               tp->length = length;
+               per_cpu(max_path, cpu) = active;
+
+               /* find next unused trace path */
+               active = __ipipe_get_free_trace_path(active, cpu);
+
+               raw_spin_unlock(&global_path_lock);
+
+               tp = &per_cpu(trace_path, cpu)[active];
+
+               /* migrate last entries for pre-tracing */
+               __ipipe_migrate_pre_trace(tp, old_tp, pos);
+       }
+
+       return tp;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos)
+{
+       struct ipipe_trace_path *old_tp = tp;
+       long active = per_cpu(active_path, cpu);
+       int n;
+
+       /* frozen paths have no core (begin=end) */
+       tp->begin = tp->end;
+
+       /* we need protection here against other cpus trying
+        * to set their frozen path or to start a proc dump */
+       raw_spin_lock(&global_path_lock);
+
+       per_cpu(frozen_path, cpu) = active;
+
+       /* find next unused trace path */
+       active = __ipipe_get_free_trace_path(active, cpu);
+
+       /* check if this is the first frozen path */
+       for_each_possible_cpu(n) {
+               if (n != cpu &&
+                   per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0)
+                       tp->end = -1;
+       }
+
+       raw_spin_unlock(&global_path_lock);
+
+       tp = &per_cpu(trace_path, cpu)[active];
+
+       /* migrate last entries for pre-tracing */
+       __ipipe_migrate_pre_trace(tp, old_tp, pos);
+
+       return tp;
+}
+
+void notrace
+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
+             unsigned long parent_eip, unsigned long v)
+{
+       struct ipipe_trace_path *tp, *old_tp;
+       int pos, next_pos, begin;
+       struct ipipe_trace_point *point;
+       unsigned long flags;
+       int cpu;
+
+       flags = hard_local_irq_save_notrace();
+
+       cpu = ipipe_processor_id();
+ restart:
+       tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+       /* here starts a race window with NMIs - catched below */
+
+       /* check for NMI recursion */
+       if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
+               tp->flags |= IPIPE_TFLG_NMI_HIT;
+
+               /* first freeze request from NMI context? */
+               if ((type == IPIPE_TRACE_FREEZE) &&
+                   !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
+                       /* save arguments and mark deferred freezing */
+                       tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
+                       tp->nmi_saved_eip = eip;
+                       tp->nmi_saved_parent_eip = parent_eip;
+                       tp->nmi_saved_v = v;
+               }
+               return; /* no need for restoring flags inside IRQ */
+       }
+
+       /* clear NMI events and set lock (atomically per cpu) */
+       tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+                                  IPIPE_TFLG_NMI_FREEZE_REQ))
+                              | IPIPE_TFLG_NMI_LOCK;
+
+       /* check active_path again - some nasty NMI may have switched
+        * it meanwhile */
+       if (unlikely(tp !=
+                    &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) {
+               /* release lock on wrong path and restart */
+               tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+               /* there is no chance that the NMI got deferred
+                * => no need to check for pending freeze requests */
+               goto restart;
+       }
+
+       /* get the point buffer */
+       pos = tp->trace_pos;
+       point = &tp->point[pos];
+
+       /* store all trace point data */
+       point->type = type;
+       point->flags = hard_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
+       point->eip = eip;
+       point->parent_eip = parent_eip;
+       point->v = v;
+       ipipe_read_tsc(point->timestamp);
+
+       __ipipe_store_domain_states(point);
+
+       /* forward to next point buffer */
+       next_pos = WRAP_POINT_NO(pos+1);
+       tp->trace_pos = next_pos;
+
+       /* only mark beginning if we haven't started yet */
+       begin = tp->begin;
+       if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
+               tp->begin = pos;
+
+       /* end of critical path, start post-trace if not already started */
+       if (unlikely(type == IPIPE_TRACE_END) &&
+           (begin >= 0) && !tp->post_trace)
+               tp->post_trace = post_trace + 1;
+
+       /* freeze only if the slot is free and we are not already freezing */
+       if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
+            (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
+            type == IPIPE_TRACE_FUNC)) &&
+           per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 &&
+           !(tp->flags & IPIPE_TFLG_FREEZING)) {
+               tp->post_trace = post_trace + 1;
+               tp->flags |= IPIPE_TFLG_FREEZING;
+       }
+
+       /* enforce end of trace in case of overflow */
+       if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
+               tp->end = pos;
+               goto enforce_end;
+       }
+
+       /* stop tracing this path if we are in post-trace and
+        *  a) that phase is over now or
+        *  b) a new TRACE_BEGIN came in but we are not freezing this path */
+       if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
+                    ((type == IPIPE_TRACE_BEGIN) &&
+                     !(tp->flags & IPIPE_TFLG_FREEZING))))) {
+               /* store the path's end (i.e. excluding post-trace) */
+               tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
+
+ enforce_end:
+               if (tp->flags & IPIPE_TFLG_FREEZING)
+                       tp = __ipipe_trace_freeze(cpu, tp, pos);
+               else
+                       tp = __ipipe_trace_end(cpu, tp, pos);
+
+               /* reset the active path, maybe already start a new one */
+               tp->begin = (type == IPIPE_TRACE_BEGIN) ?
+                       WRAP_POINT_NO(tp->trace_pos - 1) : -1;
+               tp->end = -1;
+               tp->post_trace = 0;
+               tp->flags = 0;
+
+               /* update active_path not earlier to avoid races with NMIs */
+               per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu);
+       }
+
+       /* we still have old_tp and point,
+        * let's reset NMI lock and check for catches */
+       old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+       if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
+               /* well, this late tagging may not immediately be visible for
+                * other cpus already dumping this path - a minor issue */
+               point->flags |= IPIPE_TFLG_NMI_HIT;
+
+               /* handle deferred freezing from NMI context */
+               if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+                       __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
+                                     old_tp->nmi_saved_parent_eip,
+                                     old_tp->nmi_saved_v);
+       }
+
+       hard_local_irq_restore_notrace(flags);
+}
+
+static unsigned long __ipipe_global_path_lock(void)
+{
+       unsigned long flags;
+       int cpu;
+       struct ipipe_trace_path *tp;
+
+       raw_spin_lock_irqsave(&global_path_lock, flags);
+
+       cpu = ipipe_processor_id();
+ restart:
+       tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+       /* here is small race window with NMIs - catched below */
+
+       /* clear NMI events and set lock (atomically per cpu) */
+       tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+                                  IPIPE_TFLG_NMI_FREEZE_REQ))
+                              | IPIPE_TFLG_NMI_LOCK;
+
+       /* check active_path again - some nasty NMI may have switched
+        * it meanwhile */
+       if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) {
+               /* release lock on wrong path and restart */
+               tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+               /* there is no chance that the NMI got deferred
+                * => no need to check for pending freeze requests */
+               goto restart;
+       }
+
+       return flags;
+}
+
+static void __ipipe_global_path_unlock(unsigned long flags)
+{
+       int cpu;
+       struct ipipe_trace_path *tp;
+
+       /* release spinlock first - it's not involved in the NMI issue */
+       __ipipe_spin_unlock_irqbegin(&global_path_lock);
+
+       cpu = ipipe_processor_id();
+       tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+       tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+       /* handle deferred freezing from NMI context */
+       if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+               __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
+                             tp->nmi_saved_parent_eip, tp->nmi_saved_v);
+
+       /* See __ipipe_spin_lock_irqsave() and friends. */
+       __ipipe_spin_unlock_irqcomplete(flags);
+}
+
+void notrace asmlinkage
+ipipe_trace_asm(enum ipipe_trace_type type, unsigned long eip,
+               unsigned long parent_eip, unsigned long v)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(type, eip, parent_eip, v);
+}
+
+void notrace ipipe_trace_begin(unsigned long v)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
+                     __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_begin);
+
+void notrace ipipe_trace_end(unsigned long v)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
+                     __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_end);
+
+void notrace ipipe_trace_irqbegin(int irq, struct pt_regs *regs)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_BEGIN, instruction_pointer(regs),
+                     __BUILTIN_RETURN_ADDRESS1, irq);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_irqbegin);
+
+void notrace ipipe_trace_irqend(int irq, struct pt_regs *regs)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_END, instruction_pointer(regs),
+                     __BUILTIN_RETURN_ADDRESS1, irq);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_irqend);
+
+void notrace ipipe_trace_freeze(unsigned long v)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
+                     __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_freeze);
+
+void notrace ipipe_trace_special(unsigned char id, unsigned long v)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
+                     __BUILTIN_RETURN_ADDRESS0,
+                     __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_special);
+
+void notrace ipipe_trace_pid(pid_t pid, short prio)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
+                     __BUILTIN_RETURN_ADDRESS0,
+                     __BUILTIN_RETURN_ADDRESS1, pid);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_pid);
+
+void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS),
+                     __BUILTIN_RETURN_ADDRESS0,
+                     __BUILTIN_RETURN_ADDRESS1, delay_tsc);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_event);
+
+int ipipe_trace_max_reset(void)
+{
+       int cpu;
+       unsigned long flags;
+       struct ipipe_trace_path *path;
+       int ret = 0;
+
+       flags = __ipipe_global_path_lock();
+
+       for_each_possible_cpu(cpu) {
+               path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
+
+               if (path->dump_lock) {
+                       ret = -EBUSY;
+                       break;
+               }
+
+               path->begin     = -1;
+               path->end       = -1;
+               path->trace_pos = 0;
+               path->length    = 0;
+       }
+
+       __ipipe_global_path_unlock(flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_max_reset);
+
+int ipipe_trace_frozen_reset(void)
+{
+       int cpu;
+       unsigned long flags;
+       struct ipipe_trace_path *path;
+       int ret = 0;
+
+       flags = __ipipe_global_path_lock();
+
+       for_each_online_cpu(cpu) {
+               path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
+
+               if (path->dump_lock) {
+                       ret = -EBUSY;
+                       break;
+               }
+
+               path->begin = -1;
+               path->end = -1;
+               path->trace_pos = 0;
+               path->length    = 0;
+       }
+
+       __ipipe_global_path_unlock(flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_frozen_reset);
+
+static void
+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
+                     int trylock)
+{
+       struct task_struct *task = NULL;
+       char buf[8];
+       int i;
+       int locked = 1;
+
+       if (trylock) {
+               if (!read_trylock(&tasklist_lock))
+                       locked = 0;
+       } else
+               read_lock(&tasklist_lock);
+
+       if (locked)
+               task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns);
+
+       if (task)
+               strncpy(task_info, task->comm, 11);
+       else
+               strcpy(task_info, "-<?>-");
+
+       if (locked)
+               read_unlock(&tasklist_lock);
+
+       for (i = strlen(task_info); i < 11; i++)
+               task_info[i] = ' ';
+
+       sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
+       strcpy(task_info + (11 - strlen(buf)), buf);
+}
+
+static void
+__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path,
+                      struct ipipe_trace_point *point)
+{
+       long time;
+       int type;
+
+       time = __ipipe_signed_tsc2us(point->timestamp -
+                                    path->point[path->begin].timestamp + point->v);
+       type = point->type >> IPIPE_TYPE_BITS;
+
+       if (type == 0)
+               /*
+                * Event type #0 is predefined, stands for the next
+                * timer tick.
+                */
+               sprintf(buf, "tick@%-6ld", time);
+       else
+               sprintf(buf, "%3d@%-7ld", type, time);
+}
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+
+void ipipe_trace_panic_freeze(void)
+{
+       unsigned long flags;
+       int cpu;
+
+       if (!ipipe_trace_enable)
+               return;
+
+       ipipe_trace_enable = 0;
+       flags = hard_local_irq_save_notrace();
+
+       cpu = ipipe_processor_id();
+
+       panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
+
+       hard_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_panic_freeze);
+
+void ipipe_trace_panic_dump(void)
+{
+       int cnt = back_trace;
+       int start, pos;
+       char buf[16];
+
+       if (!panic_path)
+               return;
+
+       ipipe_context_check_off();
+
+       printk("I-pipe tracer log (%d points):\n", cnt);
+
+       start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
+
+       while (cnt-- > 0) {
+               struct ipipe_trace_point *point = &panic_path->point[pos];
+               long time;
+               char info[16];
+               int i;
+
+               printk(" %c",
+                      (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+               for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+                       printk("%c",
+                              (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+                               (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+                                       '#' : '+') :
+                               (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+                                       '*' : ' '));
+
+               if (!point->eip)
+                       printk("-<invalid>-\n");
+               else {
+                       __ipipe_trace_point_type(buf, point);
+                       printk("%s", buf);
+
+                       switch (point->type & IPIPE_TYPE_MASK) {
+                               case IPIPE_TRACE_FUNC:
+                                       printk("           ");
+                                       break;
+
+                               case IPIPE_TRACE_PID:
+                                       __ipipe_get_task_info(info,
+                                                             point, 1);
+                                       printk("%s", info);
+                                       break;
+
+                               case IPIPE_TRACE_EVENT:
+                                       __ipipe_get_event_date(info,
+                                                              panic_path, point);
+                                       printk("%s", info);
+                                       break;
+
+                               default:
+                                       printk("0x%08lx ", point->v);
+                       }
+
+                       time = __ipipe_signed_tsc2us(point->timestamp -
+                               panic_path->point[start].timestamp);
+                       printk(" %5ld ", time);
+
+                       __ipipe_print_symname(NULL, point->eip);
+                       printk(" (");
+                       __ipipe_print_symname(NULL, point->parent_eip);
+                       printk(")\n");
+               }
+               pos = WRAP_POINT_NO(pos - 1);
+       }
+
+       panic_path = NULL;
+}
+EXPORT_SYMBOL_GPL(ipipe_trace_panic_dump);
+
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+
+
+/* --- /proc output --- */
+
+static notrace int __ipipe_in_critical_trpath(long point_no)
+{
+       return ((WRAP_POINT_NO(point_no-print_path->begin) <
+                WRAP_POINT_NO(print_path->end-print_path->begin)) ||
+               ((print_path->end == print_path->begin) &&
+                (WRAP_POINT_NO(point_no-print_path->end) >
+                 print_post_trace)));
+}
+
+static long __ipipe_signed_tsc2us(long long tsc)
+{
+       unsigned long long abs_tsc;
+       long us;
+
+       if (!__ipipe_hrclock_ok())
+               return 0;
+
+       /* ipipe_tsc2us works on unsigned => handle sign separately */
+       abs_tsc = (tsc >= 0) ? tsc : -tsc;
+       us = ipipe_tsc2us(abs_tsc);
+       if (tsc < 0)
+               return -us;
+       else
+               return us;
+}
+
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
+{
+       switch (point->type & IPIPE_TYPE_MASK) {
+               case IPIPE_TRACE_FUNC:
+                       strcpy(buf, "func    ");
+                       break;
+
+               case IPIPE_TRACE_BEGIN:
+                       strcpy(buf, "begin   ");
+                       break;
+
+               case IPIPE_TRACE_END:
+                       strcpy(buf, "end     ");
+                       break;
+
+               case IPIPE_TRACE_FREEZE:
+                       strcpy(buf, "freeze  ");
+                       break;
+
+               case IPIPE_TRACE_SPECIAL:
+                       sprintf(buf, "(0x%02x)  ",
+                               point->type >> IPIPE_TYPE_BITS);
+                       break;
+
+               case IPIPE_TRACE_PID:
+                       sprintf(buf, "[%5d] ", (pid_t)point->v);
+                       break;
+
+               case IPIPE_TRACE_EVENT:
+                       sprintf(buf, "event   ");
+                       break;
+       }
+}
+
+static void
+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
+{
+       char mark = ' ';
+       int point_no = point - print_path->point;
+       int i;
+
+       if (print_path->end == point_no)
+               mark = '<';
+       else if (print_path->begin == point_no)
+               mark = '>';
+       else if (__ipipe_in_critical_trpath(point_no))
+               mark = ':';
+       seq_printf(m, "%c%c", mark,
+                  (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+       if (!verbose_trace)
+               return;
+
+       for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+               seq_printf(m, "%c",
+                       (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+                           (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+                               '#' : '+') :
+                       (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
+}
+
+static void
+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
+{
+       unsigned long delay = 0;
+       int next;
+       char *mark = "  ";
+
+       next = WRAP_POINT_NO(point+1 - print_path->point);
+
+       if (next != print_path->trace_pos)
+               delay = ipipe_tsc2ns(print_path->point[next].timestamp -
+                                    point->timestamp);
+
+       if (__ipipe_in_critical_trpath(point - print_path->point)) {
+               if (delay > IPIPE_DELAY_WARN)
+                       mark = "! ";
+               else if (delay > IPIPE_DELAY_NOTE)
+                       mark = "+ ";
+       }
+       seq_puts(m, mark);
+
+       if (verbose_trace)
+               seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
+                          (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
+       else
+               seq_puts(m, " ");
+}
+
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
+{
+       char namebuf[KSYM_NAME_LEN+1];
+       unsigned long size, offset;
+       const char *sym_name;
+       char *modname;
+
+       sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+       if (!m) {
+               /* panic dump */
+               if (sym_name) {
+                       printk("%s+0x%lx", sym_name, offset);
+                       if (modname)
+                               printk(" [%s]", modname);
+               } else
+                       printk("<%08lx>", eip);
+       } else
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+       {
+               if (sym_name) {
+                       if (verbose_trace) {
+                               seq_printf(m, "%s+0x%lx", sym_name, offset);
+                               if (modname)
+                                       seq_printf(m, " [%s]", modname);
+                       } else
+                               seq_puts(m, sym_name);
+               } else
+                       seq_printf(m, "<%08lx>", eip);
+       }
+}
+
+static void __ipipe_print_headline(struct seq_file *m)
+{
+       const char *name[2];
+
+       seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
+                  "us\n\n", trace_overhead/1000, trace_overhead%1000);
+
+       if (verbose_trace) {
+               name[0] = ipipe_root_domain->name;
+               if (ipipe_head_domain != ipipe_root_domain)
+                       name[1] = ipipe_head_domain->name;
+               else
+                       name[1] = "<unused>";
+
+               seq_printf(m,
+                          " +----- Hard IRQs ('|': locked)\n"
+                          " |+-- %s\n"
+                          " ||+- %s%s\n"
+                          " |||                          +---------- "
+                              "Delay flag ('+': > %d us, '!': > %d us)\n"
+                          " |||                          |        +- "
+                              "NMI noise ('N')\n"
+                          " |||                          |        |\n"
+                          "      Type    User Val.   Time    Delay  Function "
+                              "(Parent)\n",
+                          name[1], name[0],
+                          " ('*': domain stalled, '+': current, "
+                          "'#': current+stalled)",
+                          IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+       } else
+               seq_printf(m,
+                          " +--------------- Hard IRQs ('|': locked)\n"
+                          " |             +- Delay flag "
+                              "('+': > %d us, '!': > %d us)\n"
+                          " |             |\n"
+                          "  Type     Time   Function (Parent)\n",
+                          IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+}
+
+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+       loff_t n = *pos;
+
+       mutex_lock(&out_mutex);
+
+       if (!n) {
+               struct ipipe_trace_path *tp;
+               unsigned long length_usecs;
+               int points, cpu;
+               unsigned long flags;
+
+               /* protect against max_path/frozen_path updates while we
+                * haven't locked our target path, also avoid recursively
+                * taking global_path_lock from NMI context */
+               flags = __ipipe_global_path_lock();
+
+               /* find the longest of all per-cpu paths */
+               print_path = NULL;
+               for_each_online_cpu(cpu) {
+                       tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
+                       if ((print_path == NULL) ||
+                           (tp->length > print_path->length)) {
+                               print_path = tp;
+                               break;
+                       }
+               }
+               print_path->dump_lock = 1;
+
+               __ipipe_global_path_unlock(flags);
+
+               if (!__ipipe_hrclock_ok()) {
+                       seq_printf(m, "No hrclock available, dumping traces disabled\n");
+                       return NULL;
+               }
+
+               /* does this path actually contain data? */
+               if (print_path->end == print_path->begin)
+                       return NULL;
+
+               /* number of points inside the critical path */
+               points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
+
+               /* pre- and post-tracing length, post-trace length was frozen
+                  in __ipipe_trace, pre-trace may have to be reduced due to
+                  buffer overrun */
+               print_pre_trace  = pre_trace;
+               print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+                                                print_path->end - 1);
+               if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+                       print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
+                               print_post_trace;
+
+               length_usecs = ipipe_tsc2us(print_path->length);
+               seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe release #%d\n"
+                          "-------------------------------------------------------------\n",
+                       UTS_RELEASE, IPIPE_CORE_RELEASE);
+               seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
+                       "%d (-%d/+%d), Length: %lu us\n",
+                       cpu, print_path->point[print_path->begin].timestamp,
+                       points, print_pre_trace, print_post_trace, length_usecs);
+               __ipipe_print_headline(m);
+       }
+
+       /* check if we are inside the trace range */
+       if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+                              print_pre_trace + print_post_trace))
+               return NULL;
+
+       /* return the next point to be shown */
+       return &print_path->point[WRAP_POINT_NO(print_path->begin -
+                                               print_pre_trace + n)];
+}
+
+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
+{
+       loff_t n = ++*pos;
+
+       /* check if we are inside the trace range with the next entry */
+       if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+                              print_pre_trace + print_post_trace))
+               return NULL;
+
+       /* return the next point to be shown */
+       return &print_path->point[WRAP_POINT_NO(print_path->begin -
+                                               print_pre_trace + *pos)];
+}
+
+static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
+{
+       if (print_path)
+               print_path->dump_lock = 0;
+       mutex_unlock(&out_mutex);
+}
+
+static int __ipipe_prtrace_show(struct seq_file *m, void *p)
+{
+       long time;
+       struct ipipe_trace_point *point = p;
+       char buf[16];
+
+       if (!point->eip) {
+               seq_puts(m, "-<invalid>-\n");
+               return 0;
+       }
+
+       __ipipe_print_pathmark(m, point);
+       __ipipe_trace_point_type(buf, point);
+       seq_puts(m, buf);
+       if (verbose_trace)
+               switch (point->type & IPIPE_TYPE_MASK) {
+                       case IPIPE_TRACE_FUNC:
+                               seq_puts(m, "           ");
+                               break;
+
+                       case IPIPE_TRACE_PID:
+                               __ipipe_get_task_info(buf, point, 0);
+                               seq_puts(m, buf);
+                               break;
+
+                       case IPIPE_TRACE_EVENT:
+                               __ipipe_get_event_date(buf, print_path, point);
+                               seq_puts(m, buf);
+                               break;
+
+                       default:
+                               seq_printf(m, "0x%08lx ", point->v);
+               }
+
+       time = __ipipe_signed_tsc2us(point->timestamp -
+               print_path->point[print_path->begin].timestamp);
+       seq_printf(m, "%5ld", time);
+
+       __ipipe_print_delay(m, point);
+       __ipipe_print_symname(m, point->eip);
+       seq_puts(m, " (");
+       __ipipe_print_symname(m, point->parent_eip);
+       seq_puts(m, ")\n");
+
+       return 0;
+}
+
+static struct seq_operations __ipipe_max_ptrace_ops = {
+       .start = __ipipe_max_prtrace_start,
+       .next  = __ipipe_prtrace_next,
+       .stop  = __ipipe_prtrace_stop,
+       .show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &__ipipe_max_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_max_reset(struct file *file, const char __user *pbuffer,
+                 size_t count, loff_t *data)
+{
+       mutex_lock(&out_mutex);
+       ipipe_trace_max_reset();
+       mutex_unlock(&out_mutex);
+
+       return count;
+}
+
+static const struct file_operations __ipipe_max_prtrace_fops = {
+       .open       = __ipipe_max_prtrace_open,
+       .read       = seq_read,
+       .write      = __ipipe_max_reset,
+       .llseek     = seq_lseek,
+       .release    = seq_release,
+};
+
+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+       loff_t n = *pos;
+
+       mutex_lock(&out_mutex);
+
+       if (!n) {
+               struct ipipe_trace_path *tp;
+               int cpu;
+               unsigned long flags;
+
+               /* protect against max_path/frozen_path updates while we
+                * haven't locked our target path, also avoid recursively
+                * taking global_path_lock from NMI context */
+               flags = __ipipe_global_path_lock();
+
+               /* find the first of all per-cpu frozen paths */
+               print_path = NULL;
+               for_each_online_cpu(cpu) {
+                       tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
+                       if (tp->end >= 0) {
+                               print_path = tp;
+                               break;
+                       }
+               }
+               if (print_path)
+                       print_path->dump_lock = 1;
+
+               __ipipe_global_path_unlock(flags);
+
+               if (!print_path)
+                       return NULL;
+
+               if (!__ipipe_hrclock_ok()) {
+                       seq_printf(m, "No hrclock available, dumping traces disabled\n");
+                       return NULL;
+               }
+
+               /* back- and post-tracing length, post-trace length was frozen
+                  in __ipipe_trace, back-trace may have to be reduced due to
+                  buffer overrun */
+               print_pre_trace  = back_trace-1; /* substract freeze point */
+               print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+                                                print_path->end - 1);
+               if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+                       print_pre_trace = IPIPE_TRACE_POINTS - 2 -
+                               print_post_trace;
+
+               seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe release #%d\n"
+                             "------------------------------------------------------------\n",
+                          UTS_RELEASE, IPIPE_CORE_RELEASE);
+               seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
+                       cpu, print_path->point[print_path->begin].timestamp,
+                       print_pre_trace+1, print_post_trace);
+               __ipipe_print_headline(m);
+       }
+
+       /* check if we are inside the trace range */
+       if (n >= print_pre_trace + 1 + print_post_trace)
+               return NULL;
+
+       /* return the next point to be shown */
+       return &print_path->point[WRAP_POINT_NO(print_path->begin-
+                                               print_pre_trace+n)];
+}
+
+static struct seq_operations __ipipe_frozen_ptrace_ops = {
+       .start = __ipipe_frozen_prtrace_start,
+       .next  = __ipipe_prtrace_next,
+       .stop  = __ipipe_prtrace_stop,
+       .show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &__ipipe_frozen_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
+                   size_t count, loff_t *data)
+{
+       char *end, buf[16];
+       int val;
+       int n;
+
+       n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+       if (copy_from_user(buf, pbuffer, n))
+               return -EFAULT;
+
+       buf[n] = '\0';
+       val = simple_strtol(buf, &end, 0);
+
+       if (((*end != '\0') && !isspace(*end)) || (val < 0))
+               return -EINVAL;
+
+       mutex_lock(&out_mutex);
+       ipipe_trace_frozen_reset();
+       if (val > 0)
+               ipipe_trace_freeze(-1);
+       mutex_unlock(&out_mutex);
+
+       return count;
+}
+
+static const struct file_operations __ipipe_frozen_prtrace_fops = {
+       .open       = __ipipe_frozen_prtrace_open,
+       .read       = seq_read,
+       .write      = __ipipe_frozen_ctrl,
+       .llseek     = seq_lseek,
+       .release    = seq_release,
+};
+
+static int __ipipe_rd_proc_val(struct seq_file *p, void *data)
+{
+       seq_printf(p, "%u\n", *(int *)p->private);
+       return 0;
+}
+
+static ssize_t
+__ipipe_wr_proc_val(struct file *file, const char __user *buffer,
+                   size_t count, loff_t *data)
+{
+       struct seq_file *p = file->private_data;
+       char *end, buf[16];
+       int val;
+       int n;
+
+       n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+       if (copy_from_user(buf, buffer, n))
+               return -EFAULT;
+
+       buf[n] = '\0';
+       val = simple_strtol(buf, &end, 0);
+
+       if (((*end != '\0') && !isspace(*end)) || (val < 0))
+               return -EINVAL;
+
+       mutex_lock(&out_mutex);
+       *(int *)p->private = val;
+       mutex_unlock(&out_mutex);
+
+       return count;
+}
+
+static int __ipipe_rw_proc_val_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, __ipipe_rd_proc_val, PDE_DATA(inode));
+}
+
+static const struct file_operations __ipipe_rw_proc_val_ops = {
+       .open           = __ipipe_rw_proc_val_open,
+       .read           = seq_read,
+       .write          = __ipipe_wr_proc_val,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static void __init
+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
+                             const char *name, int *value_ptr)
+{
+       proc_create_data(name, 0644, trace_dir, &__ipipe_rw_proc_val_ops,
+                        value_ptr);
+}
+
+static int __ipipe_rd_trigger(struct seq_file *p, void *data)
+{
+       char str[KSYM_SYMBOL_LEN];
+
+       if (trigger_begin) {
+               sprint_symbol(str, trigger_begin);
+               seq_printf(p, "%s\n", str);
+       }
+       return 0;
+}
+
+static ssize_t
+__ipipe_wr_trigger(struct file *file, const char __user *buffer,
+                  size_t count, loff_t *data)
+{
+       char buf[KSYM_SYMBOL_LEN];
+       unsigned long begin, end;
+
+       if (count > sizeof(buf) - 1)
+               count = sizeof(buf) - 1;
+       if (copy_from_user(buf, buffer, count))
+               return -EFAULT;
+       buf[count] = 0;
+       if (buf[count-1] == '\n')
+               buf[count-1] = 0;
+
+       begin = kallsyms_lookup_name(buf);
+       if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
+               return -ENOENT;
+       end += begin - 1;
+
+       mutex_lock(&out_mutex);
+       /* invalidate the current range before setting a new one */
+       trigger_end = 0;
+       wmb();
+       ipipe_trace_frozen_reset();
+
+       /* set new range */
+       trigger_begin = begin;
+       wmb();
+       trigger_end = end;
+       mutex_unlock(&out_mutex);
+
+       return count;
+}
+
+static int __ipipe_rw_trigger_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, __ipipe_rd_trigger, NULL);
+}
+
+static const struct file_operations __ipipe_rw_trigger_ops = {
+       .open           = __ipipe_rw_trigger_open,
+       .read           = seq_read,
+       .write          = __ipipe_wr_trigger,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+static void notrace
+ipipe_trace_function(unsigned long ip, unsigned long parent_ip,
+                    struct ftrace_ops *op, struct pt_regs *regs)
+{
+       if (!ipipe_trace_enable)
+               return;
+       __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0);
+}
+
+static struct ftrace_ops ipipe_trace_ops = {
+       .func = ipipe_trace_function,
+       .flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE,
+};
+
+static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer,
+                                size_t count, loff_t *data)
+{
+       char *end, buf[16];
+       int val;
+       int n;
+
+       n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+       if (copy_from_user(buf, buffer, n))
+               return -EFAULT;
+
+       buf[n] = '\0';
+       val = simple_strtol(buf, &end, 0);
+
+       if (((*end != '\0') && !isspace(*end)) || (val < 0))
+               return -EINVAL;
+
+       mutex_lock(&out_mutex);
+
+       if (ipipe_trace_enable) {
+               if (!val)
+                       unregister_ftrace_function(&ipipe_trace_ops);
+       } else if (val)
+               register_ftrace_function(&ipipe_trace_ops);
+
+       ipipe_trace_enable = val;
+
+       mutex_unlock(&out_mutex);
+
+       return count;
+}
+
+static const struct file_operations __ipipe_rw_enable_ops = {
+       .open           = __ipipe_rw_proc_val_open,
+       .read           = seq_read,
+       .write          = __ipipe_wr_enable,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+
+extern struct proc_dir_entry *ipipe_proc_root;
+
+void __init __ipipe_tracer_hrclock_initialized(void)
+{
+       unsigned long long start, end, min = ULLONG_MAX;
+       int i;
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+       if (!per_cpu(trace_path, 0))
+               return;
+#endif
+       /* Calculate minimum overhead of __ipipe_trace() */
+       hard_local_irq_disable();
+       for (i = 0; i < 100; i++) {
+               ipipe_read_tsc(start);
+               __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
+                             __BUILTIN_RETURN_ADDRESS1, 0);
+               ipipe_read_tsc(end);
+
+               end -= start;
+               if (end < min)
+                       min = end;
+       }
+       hard_local_irq_enable();
+       trace_overhead = ipipe_tsc2ns(min);
+}
+
+void __init __ipipe_init_tracer(void)
+{
+       struct proc_dir_entry *trace_dir;
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+       int cpu, path;
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+       for_each_possible_cpu(cpu) {
+               struct ipipe_trace_path *tp_buf;
+
+               tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) *
+                                     IPIPE_TRACE_PATHS, cpu_to_node(cpu));
+               if (!tp_buf) {
+                       pr_err("I-pipe: "
+                              "insufficient memory for trace buffer.\n");
+                       return;
+               }
+               memset(tp_buf, 0,
+                      sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+               for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
+                       tp_buf[path].begin = -1;
+                       tp_buf[path].end   = -1;
+               }
+               per_cpu(trace_path, cpu) = tp_buf;
+       }
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+       if (__ipipe_hrclock_ok() && !trace_overhead)
+               __ipipe_tracer_hrclock_initialized();
+
+#ifdef CONFIG_IPIPE_TRACE_ENABLE
+       ipipe_trace_enable = 1;
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+       ftrace_enabled = 1;
+       register_ftrace_function(&ipipe_trace_ops);
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
+#endif /* CONFIG_IPIPE_TRACE_ENABLE */
+
+       trace_dir = proc_mkdir("trace", ipipe_proc_root);
+
+       proc_create("max", 0644, trace_dir, &__ipipe_max_prtrace_fops);
+       proc_create("frozen", 0644, trace_dir, &__ipipe_frozen_prtrace_fops);
+
+       proc_create("trigger", 0644, trace_dir, &__ipipe_rw_trigger_ops);
+
+       __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
+                                     &pre_trace);
+       __ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
+                                     &post_trace);
+       __ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
+                                     &back_trace);
+       __ipipe_create_trace_proc_val(trace_dir, "verbose",
+                                     &verbose_trace);
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+       proc_create_data("enable", 0644, trace_dir, &__ipipe_rw_enable_ops,
+                        &ipipe_trace_enable);
+#else /* !CONFIG_IPIPE_TRACE_MCOUNT */
+       __ipipe_create_trace_proc_val(trace_dir, "enable",
+                                     &ipipe_trace_enable);
+#endif /* !CONFIG_IPIPE_TRACE_MCOUNT */
+}
index bdd18afa19a486c759e7cd4b7a2983cfa0097fcb..8555ebf1b7c48908081e209c5bed2d29b8ba39d4 100644 (file)
 #include <linux/ftrace.h>
 #include <linux/reboot.h>
 #include <linux/delay.h>
+#include <linux/ipipe_trace.h>
 #include <linux/kexec.h>
 #include <linux/sched.h>
+#include <linux/ipipe.h>
 #include <linux/sysrq.h>
 #include <linux/init.h>
 #include <linux/nmi.h>
@@ -471,6 +473,8 @@ void oops_enter(void)
 {
        tracing_off();
        /* can't trust the integrity of the kernel anymore: */
+       ipipe_trace_panic_freeze();
+       ipipe_disable_context_check();
        debug_locks_off();
        do_oops_enter_exit();
 }
index 0ec7d1d33a1498d555eb186dfe1e3566c2948ef9..2034d02e8ee3385e5b839039fcc264f3a5281190 100644 (file)
@@ -5,7 +5,7 @@
 menu "RCU Debugging"
 
 config PROVE_RCU
-       def_bool PROVE_LOCKING
+       def_bool PROVE_LOCKING && !IPIPE
 
 config TORTURE_TEST
        tristate
index 55062461b2fd16fd8c935a5099b4fab22418a8e5..2119b43b92cc37862050ce885e9301cb69c8c7a2 100644 (file)
@@ -1780,7 +1780,9 @@ void scheduler_ipi(void)
         * however a fair share of IPIs are still resched only so this would
         * somewhat pessimize the simple resched case.
         */
+#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI
        irq_enter();
+#endif
        sched_ttwu_pending();
 
        /*
@@ -1790,7 +1792,9 @@ void scheduler_ipi(void)
                this_rq()->idle_balance = 1;
                raise_softirq_irqoff(SCHED_SOFTIRQ);
        }
+#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI
        irq_exit();
+#endif
 }
 
 static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
index 257f4f0b4532b0428a8ff3c72513b0a54cb0029d..3e23da3bd6aad868e596b5b79c87bc5becf9dbce 100644 (file)
@@ -84,6 +84,51 @@ void __weak arch_cpu_idle(void)
        local_irq_enable();
 }
 
+#ifdef CONFIG_IPIPE
+
+bool __weak ipipe_enter_idle_hook(void)
+{
+       /*
+        * By default, we may enter the idle state if no co-kernel is
+        * present.
+        */
+       return ipipe_root_domain == ipipe_head_domain;
+}
+
+void __weak ipipe_exit_idle_hook(void) { }
+
+static bool pipeline_idle_enter(void)
+{
+       struct ipipe_percpu_domain_data *p;
+
+       /*
+        * We may go idle if no interrupt is waiting delivery from the
+        * root stage, or a co-kernel denies such transition.
+        */
+       hard_local_irq_disable();
+       p = ipipe_this_cpu_root_context();
+
+       return !__ipipe_ipending_p(p) && ipipe_enter_idle_hook();
+}
+
+static inline void pipeline_idle_exit(void)
+{
+       ipipe_exit_idle_hook();
+       /* unstall and re-enable hw IRQs too. */
+       local_irq_enable();
+}
+
+#else
+
+static inline bool pipeline_idle_enter(void)
+{
+       return true;
+}
+
+static inline void pipeline_idle_exit(void) { }
+
+#endif /* !CONFIG_IPIPE */
+
 /**
  * default_idle_call - Default CPU idle routine.
  *
@@ -91,11 +136,12 @@ void __weak arch_cpu_idle(void)
  */
 void __cpuidle default_idle_call(void)
 {
-       if (current_clr_polling_and_test()) {
+       if (current_clr_polling_and_test() || !pipeline_idle_enter()) {
                local_irq_enable();
        } else {
                stop_critical_timings();
                arch_cpu_idle();
+               pipeline_idle_exit();
                start_critical_timings();
        }
 }
@@ -103,11 +149,13 @@ void __cpuidle default_idle_call(void)
 static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                      int next_state)
 {
+       int ret;
+
        /*
         * The idle task must be scheduled, it is pointless to go to idle, just
         * update no idle residency and return.
         */
-       if (current_clr_polling_and_test()) {
+       if (current_clr_polling_and_test() || !pipeline_idle_enter()) {
                dev->last_residency = 0;
                local_irq_enable();
                return -EBUSY;
@@ -118,7 +166,10 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
         * This function will block until an interrupt occurs and will take
         * care of re-enabling the local interrupts
         */
-       return cpuidle_enter(drv, dev, next_state);
+       ret = cpuidle_enter(drv, dev, next_state);
+       pipeline_idle_exit();
+
+       return ret;
 }
 
 /**
@@ -157,6 +208,10 @@ static void cpuidle_idle_call(void)
                goto exit_idle;
        }
 
+       if (!pipeline_idle_enter()) {
+               local_irq_enable();
+               goto exit_idle;
+       }
        /*
         * Suspend-to-idle ("s2idle") is a system state in which all user space
         * has been frozen, all I/O devices have been suspended and the only
@@ -178,12 +233,14 @@ static void cpuidle_idle_call(void)
 
                next_state = cpuidle_find_deepest_state(drv, dev);
                call_cpuidle(drv, dev, next_state);
+               pipeline_idle_exit();
        } else {
                /*
                 * Ask the cpuidle framework to choose a convenient idle state.
                 */
                next_state = cpuidle_select(drv, dev);
                entered_state = call_cpuidle(drv, dev, next_state);
+               pipeline_idle_exit();
                /*
                 * Give the governor an opportunity to reflect on the outcome
                 */
index ff21b4dbb3922fd2046b3a3c62acd34e5c5a4f80..42c9175e6581f3ca50666859dd0c48cfe608bab3 100644 (file)
@@ -412,6 +412,7 @@ config MAGIC_SYSRQ
          keys are documented in <file:Documentation/admin-guide/sysrq.rst>.
          Don't say Y unless you really know what this hack does.
 
+
 config MAGIC_SYSRQ_DEFAULT_ENABLE
        hex "Enable magic SysRq key functions by default"
        depends on MAGIC_SYSRQ
@@ -431,6 +432,8 @@ config MAGIC_SYSRQ_SERIAL
          This option allows you to decide whether you want to enable the
          magic SysRq key.
 
+source "kernel/ipipe/Kconfig.debug"
+
 config DEBUG_KERNEL
        bool "Kernel debugging"
        help