From: Philippe Gerum Date: Sun, 3 Dec 2017 11:11:44 +0000 (+0100) Subject: ipipe: add generic pipeline core X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=68719f5571543ef30b1ade9be04a7b4e909f63fa;p=platform%2Fkernel%2Flinux-exynos.git ipipe: add generic pipeline core --- diff --git a/include/asm-generic/ipipe.h b/include/asm-generic/ipipe.h new file mode 100644 index 000000000000..6c19c76e0feb --- /dev/null +++ b/include/asm-generic/ipipe.h @@ -0,0 +1,72 @@ +/* -*- linux-c -*- + * include/asm-generic/ipipe.h + * + * Copyright (C) 2002-2017 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + */ +#ifndef __ASM_GENERIC_IPIPE_H +#define __ASM_GENERIC_IPIPE_H + +#ifdef CONFIG_IPIPE + +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \ + defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT) +void __ipipe_uaccess_might_fault(void); +#else +#define __ipipe_uaccess_might_fault() might_fault() +#endif + +#define hard_cond_local_irq_enable() hard_local_irq_enable() +#define hard_cond_local_irq_disable() hard_local_irq_disable() +#define hard_cond_local_irq_save() hard_local_irq_save() +#define hard_cond_local_irq_restore(flags) hard_local_irq_restore(flags) + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT +void ipipe_root_only(void); +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ +static inline void ipipe_root_only(void) { } +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +void ipipe_stall_root(void); + +void ipipe_unstall_root(void); + +unsigned long ipipe_test_and_stall_root(void); + +unsigned long ipipe_test_root(void); + +void ipipe_restore_root(unsigned long x); + +#else /* !CONFIG_IPIPE */ + +#define hard_local_irq_save() arch_local_irq_save() +#define hard_local_irq_restore(x) arch_local_irq_restore(x) +#define hard_local_irq_enable() arch_local_irq_enable() +#define hard_local_irq_disable() arch_local_irq_disable() +#define hard_irqs_disabled() irqs_disabled() + +#define hard_cond_local_irq_enable() do { } while(0) +#define hard_cond_local_irq_disable() do { } while(0) +#define hard_cond_local_irq_save() 0 +#define hard_cond_local_irq_restore(flags) do { (void)(flags); } while(0) + +#define __ipipe_uaccess_might_fault() might_fault() + +static inline void ipipe_root_only(void) { } + +#endif /* !CONFIG_IPIPE */ + +#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) +#define hard_smp_local_irq_save() hard_local_irq_save() +#define hard_smp_local_irq_restore(flags) hard_local_irq_restore(flags) +#else /* !CONFIG_SMP */ +#define hard_smp_local_irq_save() 0 +#define hard_smp_local_irq_restore(flags) do { (void)(flags); } while(0) +#endif /* CONFIG_SMP */ + +#endif diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 1817a8415a5e..848c7b442783 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -44,11 +44,33 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif +#ifdef CONFIG_IPIPE +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) +extern int __ipipe_check_percpu_access(void); +#define __ipipe_cpu_offset \ + ({ \ + WARN_ON_ONCE(__ipipe_check_percpu_access()); \ + __my_cpu_offset; \ + }) +#else +#define __ipipe_cpu_offset __my_cpu_offset +#endif +#ifndef __ipipe_raw_cpu_ptr +#define __ipipe_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __ipipe_cpu_offset) +#endif +#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var))) +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -#endif /* SMP */ +#else /* !SMP */ + +#define __ipipe_raw_cpu_ptr(ptr) VERIFY_PERCPU_PTR(ptr) +#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var))) + +#endif /* !SMP */ #ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP @@ -148,9 +170,9 @@ do { \ #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ raw_cpu_generic_to_op(pcp, val, op); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ } while (0) @@ -158,9 +180,9 @@ do { \ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_add_return(pcp, val); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -168,9 +190,9 @@ do { \ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_xchg(pcp, nval); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -178,9 +200,9 @@ do { \ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -188,10 +210,10 @@ do { \ ({ \ int __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) diff --git a/include/ipipe/setup.h b/include/ipipe/setup.h new file mode 100644 index 000000000000..c2bc5218cf65 --- /dev/null +++ b/include/ipipe/setup.h @@ -0,0 +1,10 @@ +#ifndef _IPIPE_SETUP_H +#define _IPIPE_SETUP_H + +/* + * Placeholders for setup hooks defined by client domains. + */ + +static inline void __ipipe_early_client_setup(void) { } + +#endif /* !_IPIPE_SETUP_H */ diff --git a/include/ipipe/thread_info.h b/include/ipipe/thread_info.h new file mode 100644 index 000000000000..7038c12942c8 --- /dev/null +++ b/include/ipipe/thread_info.h @@ -0,0 +1,14 @@ +#ifndef _IPIPE_THREAD_INFO_H +#define _IPIPE_THREAD_INFO_H + +/* + * Placeholder for private thread information defined by client + * domains. + */ + +struct ipipe_threadinfo { +}; + +#define __ipipe_init_threadinfo(__p) do { } while (0) + +#endif /* !_IPIPE_THREAD_INFO_H */ diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h new file mode 100644 index 000000000000..6621c4a35739 --- /dev/null +++ b/include/linux/ipipe.h @@ -0,0 +1,447 @@ +/* -*- linux-c -*- + * include/linux/ipipe.h + * + * Copyright (C) 2002-2014 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_H +#define __LINUX_IPIPE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPIPE + +#include + +/* ipipe_set_hooks(..., enables) */ +#define IPIPE_SYSCALL __IPIPE_SYSCALL_E +#define IPIPE_TRAP __IPIPE_TRAP_E +#define IPIPE_KEVENT __IPIPE_KEVENT_E + +struct ipipe_sysinfo { + int sys_nr_cpus; /* Number of CPUs on board */ + int sys_hrtimer_irq; /* hrtimer device IRQ */ + u64 sys_hrtimer_freq; /* hrtimer device frequency */ + u64 sys_hrclock_freq; /* hrclock device frequency */ + u64 sys_cpu_freq; /* CPU frequency (Hz) */ + struct ipipe_arch_sysinfo arch; +}; + +struct ipipe_work_header { + size_t size; + void (*handler)(struct ipipe_work_header *work); +}; + +extern unsigned int __ipipe_printk_virq; + +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq); + +void __ipipe_share_current(int flags); + +void __ipipe_arch_share_current(int flags); + +int __ipipe_disable_ondemand_mappings(struct task_struct *p); + +int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma); + +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + +#define prepare_arch_switch(next) \ + do { \ + hard_local_irq_enable(); \ + __ipipe_report_schedule(current, next); \ + } while(0) + +#ifndef ipipe_get_active_mm +static inline struct mm_struct *ipipe_get_active_mm(void) +{ + return __this_cpu_read(ipipe_percpu.active_mm); +} +#define ipipe_get_active_mm ipipe_get_active_mm +#endif + +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + +#define prepare_arch_switch(next) \ + do { \ + __ipipe_report_schedule(current, next); \ + hard_local_irq_disable(); \ + } while(0) + +#ifndef ipipe_get_active_mm +#define ipipe_get_active_mm() (current->active_mm) +#endif + +#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + +#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE + +extern unsigned long long __ipipe_cs_freq; + +extern struct clocksource *__ipipe_cs; + +#endif /* CONFIG_IPIPE_WANT_CLOCKSOURCE */ + +static inline bool __ipipe_hrclock_ok(void) +{ + return __ipipe_hrclock_freq != 0; +} + +static inline void __ipipe_nmi_enter(void) +{ + __this_cpu_write(ipipe_percpu.nmi_state, __ipipe_root_status); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); + ipipe_save_context_nmi(); +} + +static inline void __ipipe_nmi_exit(void) +{ + ipipe_restore_context_nmi(); + if (!test_bit(IPIPE_STALL_FLAG, raw_cpu_ptr(&ipipe_percpu.nmi_state))) + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); +} + +/* KVM-side calls, hw IRQs off. */ +static inline void __ipipe_enter_vm(struct ipipe_vm_notifier *vmf) +{ + struct ipipe_percpu_data *p; + + p = raw_cpu_ptr(&ipipe_percpu); + p->vm_notifier = vmf; + barrier(); +} + +static inline void __ipipe_exit_vm(void) +{ + struct ipipe_percpu_data *p; + + p = raw_cpu_ptr(&ipipe_percpu); + p->vm_notifier = NULL; + barrier(); +} + +/* Client-side call, hw IRQs off. */ +void __ipipe_notify_vm_preemption(void); + +static inline void __ipipe_sync_pipeline(struct ipipe_domain *top) +{ + if (__ipipe_current_domain != top) { + __ipipe_do_sync_pipeline(top); + return; + } + if (!test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(top)->status)) + __ipipe_sync_stage(); +} + +void ipipe_register_head(struct ipipe_domain *ipd, + const char *name); + +void ipipe_unregister_head(struct ipipe_domain *ipd); + +int ipipe_request_irq(struct ipipe_domain *ipd, + unsigned int irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t ackfn); + +void ipipe_free_irq(struct ipipe_domain *ipd, + unsigned int irq); + +void ipipe_raise_irq(unsigned int irq); + +int ipipe_handle_syscall(struct thread_info *ti, + unsigned long nr, struct pt_regs *regs); + +void ipipe_set_hooks(struct ipipe_domain *ipd, + int enables); + +unsigned int ipipe_alloc_virq(void); + +void ipipe_free_virq(unsigned int virq); + +static inline void ipipe_post_irq_head(unsigned int irq) +{ + __ipipe_set_irq_pending(ipipe_head_domain, irq); +} + +static inline void ipipe_post_irq_root(unsigned int irq) +{ + __ipipe_set_irq_pending(&ipipe_root, irq); +} + +static inline void ipipe_stall_head(void) +{ + hard_local_irq_disable(); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); +} + +static inline unsigned long ipipe_test_and_stall_head(void) +{ + hard_local_irq_disable(); + return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); +} + +static inline unsigned long ipipe_test_head(void) +{ + unsigned long flags, ret; + + flags = hard_smp_local_irq_save(); + ret = test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); + hard_smp_local_irq_restore(flags); + + return ret; +} + +void ipipe_unstall_head(void); + +void __ipipe_restore_head(unsigned long x); + +static inline void ipipe_restore_head(unsigned long x) +{ + ipipe_check_irqoff(); + if ((x ^ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)) & 1) + __ipipe_restore_head(x); +} + +void __ipipe_post_work_root(struct ipipe_work_header *work); + +#define ipipe_post_work_root(p, header) \ + do { \ + void header_not_at_start(void); \ + if (offsetof(typeof(*(p)), header)) { \ + header_not_at_start(); \ + } \ + __ipipe_post_work_root(&(p)->header); \ + } while (0) + +int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); + +unsigned long ipipe_critical_enter(void (*syncfn)(void)); + +void ipipe_critical_exit(unsigned long flags); + +void ipipe_prepare_panic(void); + +#ifdef CONFIG_SMP +#ifndef ipipe_smp_p +#define ipipe_smp_p (1) +#endif +void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask); +void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask); +#else /* !CONFIG_SMP */ +#define ipipe_smp_p (0) +static inline +void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) { } +static inline void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) { } +static inline void ipipe_disable_smp(void) { } +#endif /* CONFIG_SMP */ + +static inline void ipipe_restore_root_nosync(unsigned long x) +{ + unsigned long flags; + + flags = hard_smp_local_irq_save(); + __ipipe_restore_root_nosync(x); + hard_smp_local_irq_restore(flags); +} + +/* Must be called hw IRQs off. */ +static inline void ipipe_lock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = __ipipe_current_domain; + if (ipd == ipipe_root_domain) + __ipipe_lock_irq(irq); +} + +/* Must be called hw IRQs off. */ +static inline void ipipe_unlock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = __ipipe_current_domain; + if (ipd == ipipe_root_domain) + __ipipe_unlock_irq(irq); +} + +static inline struct ipipe_threadinfo *ipipe_current_threadinfo(void) +{ + return ¤t_thread_info()->ipipe_data; +} + +#define ipipe_task_threadinfo(p) (&task_thread_info(p)->ipipe_data) + +void ipipe_enable_irq(unsigned int irq); + +static inline void ipipe_disable_irq(unsigned int irq) +{ + struct irq_desc *desc; + struct irq_chip *chip; + + desc = irq_to_desc(irq); + if (desc == NULL) + return; + + chip = irq_desc_get_chip(desc); + + if (WARN_ON_ONCE(chip->irq_disable == NULL && chip->irq_mask == NULL)) + return; + + if (chip->irq_disable) + chip->irq_disable(&desc->irq_data); + else + chip->irq_mask(&desc->irq_data); +} + +static inline void ipipe_end_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) + desc->ipipe_end(desc); +} + +static inline int ipipe_chained_irq_p(struct irq_desc *desc) +{ + void __ipipe_chained_irq(struct irq_desc *desc); + + return desc->handle_irq == __ipipe_chained_irq; +} + +static inline void ipipe_handle_demuxed_irq(unsigned int cascade_irq) +{ + ipipe_trace_irq_entry(cascade_irq); + __ipipe_dispatch_irq(cascade_irq, IPIPE_IRQF_NOSYNC); + ipipe_trace_irq_exit(cascade_irq); +} + +static inline void __ipipe_init_threadflags(struct thread_info *ti) +{ + ti->ipipe_flags = 0; +} + +static inline +void ipipe_set_ti_thread_flag(struct thread_info *ti, int flag) +{ + set_bit(flag, &ti->ipipe_flags); +} + +static inline +void ipipe_clear_ti_thread_flag(struct thread_info *ti, int flag) +{ + clear_bit(flag, &ti->ipipe_flags); +} + +static inline +void ipipe_test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) +{ + test_and_clear_bit(flag, &ti->ipipe_flags); +} + +static inline +int ipipe_test_ti_thread_flag(struct thread_info *ti, int flag) +{ + return test_bit(flag, &ti->ipipe_flags); +} + +#define ipipe_set_thread_flag(flag) \ + ipipe_set_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_clear_thread_flag(flag) \ + ipipe_clear_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_test_and_clear_thread_flag(flag) \ + ipipe_test_and_clear_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_test_thread_flag(flag) \ + ipipe_test_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_enable_notifier(p) \ + ipipe_set_ti_thread_flag(task_thread_info(p), TIP_NOTIFY) + +#define ipipe_disable_notifier(p) \ + do { \ + struct thread_info *ti = task_thread_info(p); \ + ipipe_clear_ti_thread_flag(ti, TIP_NOTIFY); \ + ipipe_clear_ti_thread_flag(ti, TIP_MAYDAY); \ + } while (0) + +#define ipipe_notifier_enabled_p(p) \ + ipipe_test_ti_thread_flag(task_thread_info(p), TIP_NOTIFY) + +#define ipipe_raise_mayday(p) \ + do { \ + struct thread_info *ti = task_thread_info(p); \ + ipipe_check_irqoff(); \ + if (ipipe_test_ti_thread_flag(ti, TIP_NOTIFY)) \ + ipipe_set_ti_thread_flag(ti, TIP_MAYDAY); \ + } while (0) + +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_tracer_hrclock_initialized(void); +#else /* !CONFIG_IPIPE_TRACE */ +#define __ipipe_tracer_hrclock_initialized() do { } while(0) +#endif /* !CONFIG_IPIPE_TRACE */ + +int ipipe_get_domain_slope_hook(struct task_struct *prev, + struct task_struct *next); + +#else /* !CONFIG_IPIPE */ + +#define __ipipe_root_p 1 +#define ipipe_root_p 1 + +static inline void __ipipe_init_threadflags(struct thread_info *ti) { } + +static inline void __ipipe_nmi_enter(void) { } + +static inline void __ipipe_nmi_exit(void) { } + +#define ipipe_safe_current() current +#define ipipe_processor_id() smp_processor_id() + +static inline int ipipe_test_foreign_stack(void) +{ + return 0; +} + +static inline void ipipe_lock_irq(unsigned int irq) { } + +static inline void ipipe_unlock_irq(unsigned int irq) { } + +static inline int ipipe_handle_syscall(struct thread_info *ti, + unsigned long nr, struct pt_regs *regs) +{ + return 0; +} + +static inline +int ipipe_get_domain_slope_hook(struct task_struct *prev, + struct task_struct *next) +{ + return 0; +} + +#endif /* !CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_H */ diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h new file mode 100644 index 000000000000..f476c5ab12d1 --- /dev/null +++ b/include/linux/ipipe_base.h @@ -0,0 +1,243 @@ +/* -*- linux-c -*- + * include/linux/ipipe_base.h + * + * Copyright (C) 2002-2014 Philippe Gerum. + * 2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_BASE_H +#define __LINUX_IPIPE_BASE_H + +struct kvm_vcpu; +struct ipipe_vm_notifier; +struct irq_desc; + +#ifdef CONFIG_IPIPE + +#define IPIPE_CORE_APIREV CONFIG_IPIPE_CORE_APIREV + +#include +#include +#include +#include + +struct pt_regs; +struct ipipe_domain; + +struct ipipe_trap_data { + int exception; + struct pt_regs *regs; +}; + +struct ipipe_vm_notifier { + void (*handler)(struct ipipe_vm_notifier *nfy); +}; + +static inline int ipipe_virtual_irq_p(unsigned int irq) +{ + return irq >= IPIPE_VIRQ_BASE && irq < IPIPE_NR_IRQS; +} + +void __ipipe_init_early(void); + +void __ipipe_init(void); + +#ifdef CONFIG_PROC_FS +void __ipipe_init_proc(void); +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_init_tracer(void); +#else /* !CONFIG_IPIPE_TRACE */ +static inline void __ipipe_init_tracer(void) { } +#endif /* CONFIG_IPIPE_TRACE */ +#else /* !CONFIG_PROC_FS */ +static inline void __ipipe_init_proc(void) { } +#endif /* CONFIG_PROC_FS */ + +void __ipipe_restore_root_nosync(unsigned long x); + +#define IPIPE_IRQF_NOACK 0x1 +#define IPIPE_IRQF_NOSYNC 0x2 + +void __ipipe_dispatch_irq(unsigned int irq, int flags); + +void __ipipe_do_sync_stage(void); + +void __ipipe_do_sync_pipeline(struct ipipe_domain *top); + +void __ipipe_lock_irq(unsigned int irq); + +void __ipipe_unlock_irq(unsigned int irq); + +void __ipipe_do_critical_sync(unsigned int irq, void *cookie); + +void __ipipe_ack_edge_irq(struct irq_desc *desc); + +void __ipipe_nop_irq(struct irq_desc *desc); + +static inline void __ipipe_idle(void) +{ + ipipe_unstall_root(); +} + +#ifndef __ipipe_sync_check +#define __ipipe_sync_check 1 +#endif + +static inline void __ipipe_sync_stage(void) +{ + if (likely(__ipipe_sync_check)) + __ipipe_do_sync_stage(); +} + +#ifndef __ipipe_run_irqtail +#define __ipipe_run_irqtail(irq) do { } while(0) +#endif + +int __ipipe_log_printk(const char *fmt, va_list args); +void __ipipe_flush_printk(unsigned int irq, void *cookie); + +#define __ipipe_get_cpu(flags) ({ (flags) = hard_preempt_disable(); ipipe_processor_id(); }) +#define __ipipe_put_cpu(flags) hard_preempt_enable(flags) + +int __ipipe_notify_syscall(struct pt_regs *regs); + +int __ipipe_notify_trap(int exception, struct pt_regs *regs); + +int __ipipe_notify_kevent(int event, void *data); + +#define __ipipe_report_trap(exception, regs) \ + __ipipe_notify_trap(exception, regs) + +#define __ipipe_report_sigwake(p) \ + do { \ + if (ipipe_notifier_enabled_p(p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_SIGWAKE, p); \ + } while (0) + +struct ipipe_cpu_migration_data { + struct task_struct *task; + int dest_cpu; +}; + +#define __ipipe_report_setaffinity(__p, __dest_cpu) \ + do { \ + struct ipipe_cpu_migration_data d = { \ + .task = (__p), \ + .dest_cpu = (__dest_cpu), \ + }; \ + if (ipipe_notifier_enabled_p(__p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_SETAFFINITY, &d); \ + } while (0) + +#define __ipipe_report_exit(p) \ + do { \ + if (ipipe_notifier_enabled_p(p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_EXIT, p); \ + } while (0) + +#define __ipipe_report_setsched(p) \ + do { \ + if (ipipe_notifier_enabled_p(p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_SETSCHED, p); \ + } while (0) + +#define __ipipe_report_schedule(prev, next) \ +do { \ + if (ipipe_notifier_enabled_p(next) || \ + ipipe_notifier_enabled_p(prev)) { \ + __this_cpu_write(ipipe_percpu.rqlock_owner, prev); \ + __ipipe_notify_kevent(IPIPE_KEVT_SCHEDULE, next); \ + } \ +} while (0) + +#define __ipipe_report_cleanup(mm) \ + __ipipe_notify_kevent(IPIPE_KEVT_CLEANUP, mm) + +#define __ipipe_report_clockfreq_update(freq) \ + __ipipe_notify_kevent(IPIPE_KEVT_CLOCKFREQ, &(freq)) + +void __ipipe_notify_vm_preemption(void); + +void __ipipe_call_mayday(struct pt_regs *regs); + +static inline void __ipipe_init_taskinfo(struct task_struct *p) { } + +#define __ipipe_serial_debug(__fmt, __args...) raw_printk(__fmt, ##__args) + +#else /* !CONFIG_IPIPE */ + +struct task_struct; +struct mm_struct; + +static inline void __ipipe_init_early(void) { } + +static inline void __ipipe_init(void) { } + +static inline void __ipipe_init_proc(void) { } + +static inline void __ipipe_idle(void) { } + +static inline void __ipipe_report_sigwake(struct task_struct *p) { } + +static inline void __ipipe_report_setaffinity(struct task_struct *p, + int dest_cpu) { } + +static inline void __ipipe_report_setsched(struct task_struct *p) { } + +static inline void __ipipe_report_exit(struct task_struct *p) { } + +static inline void __ipipe_report_cleanup(struct mm_struct *mm) { } + +#define __ipipe_report_trap(exception, regs) 0 + +static inline void __ipipe_init_taskinfo(struct task_struct *p) { } + +#define hard_preempt_disable() ({ preempt_disable(); 0; }) +#define hard_preempt_enable(flags) ({ preempt_enable(); (void)(flags); }) + +#define __ipipe_get_cpu(flags) ({ (void)(flags); get_cpu(); }) +#define __ipipe_put_cpu(flags) \ + do { \ + (void)(flags); \ + put_cpu(); \ + } while (0) + +#define __ipipe_root_tick_p(regs) 1 + +#define ipipe_handle_demuxed_irq(irq) generic_handle_irq(irq) + +#define __ipipe_enter_vm(vmf) do { } while (0) + +static inline void __ipipe_exit_vm(void) { } + +static inline void __ipipe_notify_vm_preemption(void) { } + +#define __ipipe_serial_debug(__fmt, __args...) do { } while (0) + +#endif /* !CONFIG_IPIPE */ + +#ifdef CONFIG_IPIPE_WANT_PTE_PINNING +void __ipipe_pin_mapping_globally(unsigned long start, + unsigned long end); +#else +static inline void __ipipe_pin_mapping_globally(unsigned long start, + unsigned long end) +{ } +#endif + +#endif /* !__LINUX_IPIPE_BASE_H */ diff --git a/include/linux/ipipe_debug.h b/include/linux/ipipe_debug.h new file mode 100644 index 000000000000..5d7efefbdddf --- /dev/null +++ b/include/linux/ipipe_debug.h @@ -0,0 +1,100 @@ +/* -*- linux-c -*- + * include/linux/ipipe_debug.h + * + * Copyright (C) 2012 Philippe Gerum . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_DEBUG_H +#define __LINUX_IPIPE_DEBUG_H + +#include + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + +#include + +static inline int ipipe_disable_context_check(void) +{ + return xchg(raw_cpu_ptr(&ipipe_percpu.context_check), 0); +} + +static inline void ipipe_restore_context_check(int old_state) +{ + __this_cpu_write(ipipe_percpu.context_check, old_state); +} + +static inline void ipipe_context_check_off(void) +{ + int cpu; + for_each_online_cpu(cpu) + per_cpu(ipipe_percpu, cpu).context_check = 0; +} + +static inline void ipipe_save_context_nmi(void) +{ + int state = ipipe_disable_context_check(); + __this_cpu_write(ipipe_percpu.context_check_saved, state); +} + +static inline void ipipe_restore_context_nmi(void) +{ + ipipe_restore_context_check(__this_cpu_read(ipipe_percpu.context_check_saved)); +} + +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +static inline int ipipe_disable_context_check(void) +{ + return 0; +} + +static inline void ipipe_restore_context_check(int old_state) { } + +static inline void ipipe_context_check_off(void) { } + +static inline void ipipe_save_context_nmi(void) { } + +static inline void ipipe_restore_context_nmi(void) { } + +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +#ifdef CONFIG_IPIPE_DEBUG + +#define ipipe_check_irqoff() \ + do { \ + if (WARN_ON_ONCE(!hard_irqs_disabled())) \ + hard_local_irq_disable(); \ + } while (0) + +#else /* !CONFIG_IPIPE_DEBUG */ + +static inline void ipipe_check_irqoff(void) { } + +#endif /* !CONFIG_IPIPE_DEBUG */ + +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL +#define IPIPE_WARN(c) WARN_ON(c) +#define IPIPE_WARN_ONCE(c) WARN_ON_ONCE(c) +#define IPIPE_BUG_ON(c) BUG_ON(c) +#else +#define IPIPE_WARN(c) do { (void)(c); } while (0) +#define IPIPE_WARN_ONCE(c) do { (void)(c); } while (0) +#define IPIPE_BUG_ON(c) do { (void)(c); } while (0) +#endif + +#endif /* !__LINUX_IPIPE_DEBUG_H */ diff --git a/include/linux/ipipe_domain.h b/include/linux/ipipe_domain.h new file mode 100644 index 000000000000..e03e97be8273 --- /dev/null +++ b/include/linux/ipipe_domain.h @@ -0,0 +1,357 @@ +/* -*- linux-c -*- + * include/linux/ipipe_domain.h + * + * Copyright (C) 2007-2012 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_DOMAIN_H +#define __LINUX_IPIPE_DOMAIN_H + +#ifdef CONFIG_IPIPE + +#include +#include +#include +#include + +struct task_struct; +struct mm_struct; +struct irq_desc; +struct ipipe_vm_notifier; + +#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1)) +/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */ +#define IPIPE_NR_VIRQS BITS_PER_LONG +/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */ +#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS) +/* Total number of IRQ slots */ +#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS) + +#define IPIPE_IRQ_LOMAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG) +#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG +/* + * We need a 3-level mapping. This allows us to handle up to 32k IRQ + * vectors on 32bit machines, 256k on 64bit ones. + */ +#define __IPIPE_3LEVEL_IRQMAP 1 +#define IPIPE_IRQ_MDMAPSZ (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG) +#else +/* + * 2-level mapping is enough. This allows us to handle up to 1024 IRQ + * vectors on 32bit machines, 4096 on 64bit ones. + */ +#define __IPIPE_2LEVEL_IRQMAP 1 +#endif + +/* Per-cpu pipeline status */ +#define IPIPE_STALL_FLAG 0 /* interrupts (virtually) disabled. */ +#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) + +/* Interrupt control bits */ +#define IPIPE_HANDLE_FLAG 0 +#define IPIPE_STICKY_FLAG 1 +#define IPIPE_LOCK_FLAG 2 +#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) +#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) +#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) + +#define __IPIPE_SYSCALL_P 0 +#define __IPIPE_TRAP_P 1 +#define __IPIPE_KEVENT_P 2 +#define __IPIPE_SYSCALL_E (1 << __IPIPE_SYSCALL_P) +#define __IPIPE_TRAP_E (1 << __IPIPE_TRAP_P) +#define __IPIPE_KEVENT_E (1 << __IPIPE_KEVENT_P) +#define __IPIPE_ALL_E 0x7 +#define __IPIPE_SYSCALL_R (8 << __IPIPE_SYSCALL_P) +#define __IPIPE_TRAP_R (8 << __IPIPE_TRAP_P) +#define __IPIPE_KEVENT_R (8 << __IPIPE_KEVENT_P) +#define __IPIPE_SHIFT_R 3 +#define __IPIPE_ALL_R (__IPIPE_ALL_E << __IPIPE_SHIFT_R) + +#define IPIPE_KEVT_SCHEDULE 0 +#define IPIPE_KEVT_SIGWAKE 1 +#define IPIPE_KEVT_SETSCHED 2 +#define IPIPE_KEVT_SETAFFINITY 3 +#define IPIPE_KEVT_EXIT 4 +#define IPIPE_KEVT_CLEANUP 5 +#define IPIPE_KEVT_HOSTRT 6 +#define IPIPE_KEVT_CLOCKFREQ 7 + +typedef void (*ipipe_irq_ackfn_t)(struct irq_desc *desc); + +typedef void (*ipipe_irq_handler_t)(unsigned int irq, + void *cookie); + +struct ipipe_domain { + int context_offset; + struct ipipe_irqdesc { + unsigned long control; + ipipe_irq_ackfn_t ackfn; + ipipe_irq_handler_t handler; + void *cookie; + } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; + const char *name; + struct mutex mutex; +}; + +static inline void * +__ipipe_irq_cookie(struct ipipe_domain *ipd, unsigned int irq) +{ + return ipd->irqs[irq].cookie; +} + +static inline ipipe_irq_handler_t +__ipipe_irq_handler(struct ipipe_domain *ipd, unsigned int irq) +{ + return ipd->irqs[irq].handler; +} + +extern struct ipipe_domain ipipe_root; + +#define ipipe_root_domain (&ipipe_root) + +extern struct ipipe_domain *ipipe_head_domain; + +struct ipipe_percpu_domain_data { + unsigned long status; /* <= Must be first in struct. */ + unsigned long irqpend_himap; +#ifdef __IPIPE_3LEVEL_IRQMAP + unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ]; +#endif + unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ]; + unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ]; + unsigned long irqall[IPIPE_NR_IRQS]; + struct ipipe_domain *domain; + int coflags; +}; + +struct ipipe_percpu_data { + struct ipipe_percpu_domain_data root; + struct ipipe_percpu_domain_data head; + struct ipipe_percpu_domain_data *curr; + struct pt_regs tick_regs; + int hrtimer_irq; + struct task_struct *task_hijacked; + struct task_struct *rqlock_owner; + struct ipipe_vm_notifier *vm_notifier; + unsigned long nmi_state; + struct mm_struct *active_mm; +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + int context_check; + int context_check_saved; +#endif +}; + +/* + * CAREFUL: all accessors based on __ipipe_raw_cpu_ptr() you may find + * in this file should be used only while hw interrupts are off, to + * prevent from CPU migration regardless of the running domain. + */ +DECLARE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu); + +static inline struct ipipe_percpu_domain_data * +__context_of(struct ipipe_percpu_data *p, struct ipipe_domain *ipd) +{ + return (void *)p + ipd->context_offset; +} + +/** + * ipipe_percpu_context - return the address of the pipeline context + * data for a domain on a given CPU. + * + * NOTE: this is the slowest accessor, use it carefully. Prefer + * ipipe_this_cpu_context() for requests targeted at the current + * CPU. Additionally, if the target domain is known at build time, + * consider ipipe_this_cpu_{root, head}_context(). + */ +static inline struct ipipe_percpu_domain_data * +ipipe_percpu_context(struct ipipe_domain *ipd, int cpu) +{ + return __context_of(&per_cpu(ipipe_percpu, cpu), ipd); +} + +/** + * ipipe_this_cpu_context - return the address of the pipeline context + * data for a domain on the current CPU. hw IRQs must be off. + * + * NOTE: this accessor is a bit faster, but since we don't know which + * one of "root" or "head" ipd refers to, we still need to compute the + * context address from its offset. + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_context(struct ipipe_domain *ipd) +{ + return __context_of(__ipipe_raw_cpu_ptr(&ipipe_percpu), ipd); +} + +/** + * ipipe_this_cpu_root_context - return the address of the pipeline + * context data for the root domain on the current CPU. hw IRQs must + * be off. + * + * NOTE: this accessor is recommended when the domain we refer to is + * known at build time to be the root one. + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_root_context(void) +{ + return __ipipe_raw_cpu_ptr(&ipipe_percpu.root); +} + +/** + * ipipe_this_cpu_head_context - return the address of the pipeline + * context data for the registered head domain on the current CPU. hw + * IRQs must be off. + * + * NOTE: this accessor is recommended when the domain we refer to is + * known at build time to be the registered head domain. This address + * is always different from the context data of the root domain in + * absence of registered head domain. To get the address of the + * context data for the domain leading the pipeline at the time of the + * call (which may be root in absence of registered head domain), use + * ipipe_this_cpu_leading_context() instead. + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_head_context(void) +{ + return __ipipe_raw_cpu_ptr(&ipipe_percpu.head); +} + +/** + * ipipe_this_cpu_leading_context - return the address of the pipeline + * context data for the domain leading the pipeline on the current + * CPU. hw IRQs must be off. + * + * NOTE: this accessor is required when either root or a registered + * head domain may be the final target of this call, depending on + * whether the high priority domain was installed via + * ipipe_register_head(). + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_leading_context(void) +{ + return ipipe_this_cpu_context(ipipe_head_domain); +} + +/** + * __ipipe_get_current_context() - return the address of the pipeline + * context data of the domain running on the current CPU. hw IRQs must + * be off. + */ +static inline struct ipipe_percpu_domain_data *__ipipe_get_current_context(void) +{ + return __ipipe_raw_cpu_read(ipipe_percpu.curr); +} + +#define __ipipe_current_context __ipipe_get_current_context() + +/** + * __ipipe_set_current_context() - switch the current CPU to the + * specified domain context. hw IRQs must be off. + * + * NOTE: this is the only way to change the current domain for the + * current CPU. Don't bypass. + */ +static inline +void __ipipe_set_current_context(struct ipipe_percpu_domain_data *pd) +{ + struct ipipe_percpu_data *p; + p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + p->curr = pd; +} + +/** + * __ipipe_set_current_domain() - switch the current CPU to the + * specified domain. This is equivalent to calling + * __ipipe_set_current_context() with the context data of that + * domain. hw IRQs must be off. + */ +static inline void __ipipe_set_current_domain(struct ipipe_domain *ipd) +{ + struct ipipe_percpu_data *p; + p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + p->curr = __context_of(p, ipd); +} + +static inline struct ipipe_percpu_domain_data *ipipe_current_context(void) +{ + struct ipipe_percpu_domain_data *pd; + unsigned long flags; + + flags = hard_smp_local_irq_save(); + pd = __ipipe_get_current_context(); + hard_smp_local_irq_restore(flags); + + return pd; +} + +static inline struct ipipe_domain *__ipipe_get_current_domain(void) +{ + return __ipipe_get_current_context()->domain; +} + +#define __ipipe_current_domain __ipipe_get_current_domain() + +/** + * __ipipe_get_current_domain() - return the address of the pipeline + * domain running on the current CPU. hw IRQs must be off. + */ +static inline struct ipipe_domain *ipipe_get_current_domain(void) +{ + struct ipipe_domain *ipd; + unsigned long flags; + + flags = hard_smp_local_irq_save(); + ipd = __ipipe_get_current_domain(); + hard_smp_local_irq_restore(flags); + + return ipd; +} + +#define ipipe_current_domain ipipe_get_current_domain() + +#define __ipipe_root_p (__ipipe_current_domain == ipipe_root_domain) +#define ipipe_root_p (ipipe_current_domain == ipipe_root_domain) + +#ifdef CONFIG_SMP +#define __ipipe_root_status (ipipe_this_cpu_root_context()->status) +#else +extern unsigned long __ipipe_root_status; +#endif + +#define __ipipe_head_status (ipipe_this_cpu_head_context()->status) + +/** + * __ipipe_ipending_p() - Whether we have interrupts pending + * (i.e. logged) for the given domain context on the current CPU. hw + * IRQs must be off. + */ +static inline int __ipipe_ipending_p(struct ipipe_percpu_domain_data *pd) +{ + return pd->irqpend_himap != 0; +} + +static inline unsigned long +__ipipe_cpudata_irq_hits(struct ipipe_domain *ipd, int cpu, unsigned int irq) +{ + return ipipe_percpu_context(ipd, cpu)->irqall[irq]; +} + +#endif /* CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_DOMAIN_H */ diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h new file mode 100644 index 000000000000..a108278b7f1c --- /dev/null +++ b/include/linux/ipipe_lock.h @@ -0,0 +1,327 @@ +/* -*- linux-c -*- + * include/linux/ipipe_lock.h + * + * Copyright (C) 2009 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_LOCK_H +#define __LINUX_IPIPE_LOCK_H + +typedef struct { + arch_spinlock_t arch_lock; +} __ipipe_spinlock_t; + +#define ipipe_spinlock(lock) ((__ipipe_spinlock_t *)(lock)) +#define ipipe_spinlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) || \ + __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t []) + +#define std_spinlock_raw(lock) ((raw_spinlock_t *)(lock)) +#define std_spinlock_raw_p(lock) \ + __builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) || \ + __builtin_types_compatible_p(typeof(lock), raw_spinlock_t []) + +#ifdef CONFIG_PREEMPT_RT_FULL + +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ + else __bad_lock_type(); \ + } while (0) + +#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINTRYLOCK_IRQ(lock) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ + else if (std_spinlock_raw_p(lock)) { \ + __ipipe_spin_unlock_debug(flags); \ + __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ + } else __bad_lock_type(); \ + } while (0) + +#define PICK_SPINOP(op, lock) \ + ({ \ + if (ipipe_spinlock_p(lock)) \ + arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin##op(std_spinlock_raw(lock)); \ + else __bad_lock_type(); \ + (void)0; \ + }) + +#define PICK_SPINOP_RET(op, lock, type) \ + ({ \ + type __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ + else { __ret__ = -1; __bad_lock_type(); } \ + __ret__; \ + }) + +#else /* !CONFIG_PREEMPT_RT_FULL */ + +#define std_spinlock(lock) ((spinlock_t *)(lock)) +#define std_spinlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), spinlock_t *) || \ + __builtin_types_compatible_p(typeof(lock), spinlock_t []) + +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ + else if (std_spinlock_p(lock)) \ + __real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \ + else __bad_lock_type(); \ + } while (0) + +#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ + else if (std_spinlock_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINTRYLOCK_IRQ(lock) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ + else if (std_spinlock_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ + else { \ + __ipipe_spin_unlock_debug(flags); \ + if (std_spinlock_raw_p(lock)) \ + __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ + else if (std_spinlock_p(lock)) \ + __real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \ + } \ + } while (0) + +#define PICK_SPINOP(op, lock) \ + ({ \ + if (ipipe_spinlock_p(lock)) \ + arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin##op(std_spinlock_raw(lock)); \ + else if (std_spinlock_p(lock)) \ + __real_raw_spin##op(&std_spinlock(lock)->rlock); \ + else __bad_lock_type(); \ + (void)0; \ + }) + +#define PICK_SPINOP_RET(op, lock, type) \ + ({ \ + type __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ + else if (std_spinlock_p(lock)) \ + __ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \ + else { __ret__ = -1; __bad_lock_type(); } \ + __ret__; \ + }) + +#endif /* !CONFIG_PREEMPT_RT_FULL */ + +#define arch_spin_lock_init(lock) \ + do { \ + IPIPE_DEFINE_SPINLOCK(__lock__); \ + *((ipipe_spinlock_t *)lock) = __lock__; \ + } while (0) + +#define arch_spin_lock_irq(lock) \ + do { \ + hard_local_irq_disable(); \ + arch_spin_lock(lock); \ + } while (0) + +#define arch_spin_unlock_irq(lock) \ + do { \ + arch_spin_unlock(lock); \ + hard_local_irq_enable(); \ + } while (0) + +typedef struct { + arch_rwlock_t arch_lock; +} __ipipe_rwlock_t; + +#define ipipe_rwlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *) + +#define std_rwlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), rwlock_t *) + +#define ipipe_rwlock(lock) ((__ipipe_rwlock_t *)(lock)) +#define std_rwlock(lock) ((rwlock_t *)(lock)) + +#define PICK_RWOP(op, lock) \ + do { \ + if (ipipe_rwlock_p(lock)) \ + arch##op(&ipipe_rwlock(lock)->arch_lock); \ + else if (std_rwlock_p(lock)) \ + _raw##op(std_rwlock(lock)); \ + else __bad_lock_type(); \ + } while (0) + +extern int __bad_lock_type(void); + +#ifdef CONFIG_IPIPE + +#define ipipe_spinlock_t __ipipe_spinlock_t +#define IPIPE_DEFINE_RAW_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED +#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern ipipe_spinlock_t x +#define IPIPE_DEFINE_SPINLOCK(x) IPIPE_DEFINE_RAW_SPINLOCK(x) +#define IPIPE_DECLARE_SPINLOCK(x) IPIPE_DECLARE_RAW_SPINLOCK(x) + +#define IPIPE_SPIN_LOCK_UNLOCKED \ + (__ipipe_spinlock_t) { .arch_lock = __ARCH_SPIN_LOCK_UNLOCKED } + +#define spin_lock_irqsave_cond(lock, flags) \ + spin_lock_irqsave(lock, flags) + +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock_irqrestore(lock, flags) + +#define raw_spin_lock_irqsave_cond(lock, flags) \ + raw_spin_lock_irqsave(lock, flags) + +#define raw_spin_unlock_irqrestore_cond(lock, flags) \ + raw_spin_unlock_irqrestore(lock, flags) + +void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock); + +int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock); + +void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock); + +unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock); + +int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, + unsigned long *x); + +void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, + unsigned long x); + +void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); + +void __ipipe_spin_unlock_irqcomplete(unsigned long x); + +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) +void __ipipe_spin_unlock_debug(unsigned long flags); +#else +#define __ipipe_spin_unlock_debug(flags) do { } while (0) +#endif + +#define ipipe_rwlock_t __ipipe_rwlock_t +#define IPIPE_DEFINE_RWLOCK(x) ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED +#define IPIPE_DECLARE_RWLOCK(x) extern ipipe_rwlock_t x + +#define IPIPE_RW_LOCK_UNLOCKED \ + (__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED } + +#else /* !CONFIG_IPIPE */ + +#define ipipe_spinlock_t spinlock_t +#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) +#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x +#define IPIPE_SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(unknown) +#define IPIPE_DEFINE_RAW_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x) +#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern raw_spinlock_t x + +#define spin_lock_irqsave_cond(lock, flags) \ + do { \ + (void)(flags); \ + spin_lock(lock); \ + } while(0) + +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock(lock) + +#define raw_spin_lock_irqsave_cond(lock, flags) \ + do { \ + (void)(flags); \ + raw_spin_lock(lock); \ + } while(0) + +#define raw_spin_unlock_irqrestore_cond(lock, flags) \ + raw_spin_unlock(lock) + +#define __ipipe_spin_lock_irq(lock) do { } while (0) +#define __ipipe_spin_unlock_irq(lock) do { } while (0) +#define __ipipe_spin_lock_irqsave(lock) 0 +#define __ipipe_spin_trylock_irq(lock) 1 +#define __ipipe_spin_trylock_irqsave(lock, x) ({ (void)(x); 1; }) +#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) +#define __ipipe_spin_unlock_irqbegin(lock) spin_unlock(lock) +#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) +#define __ipipe_spin_unlock_debug(flags) do { } while (0) + +#define ipipe_rwlock_t rwlock_t +#define IPIPE_DEFINE_RWLOCK(x) DEFINE_RWLOCK(x) +#define IPIPE_DECLARE_RWLOCK(x) extern rwlock_t x +#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED + +#endif /* !CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_LOCK_H */ diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h new file mode 100644 index 000000000000..120fb031f54d --- /dev/null +++ b/include/linux/ipipe_tickdev.h @@ -0,0 +1,159 @@ +/* -*- linux-c -*- + * include/linux/ipipe_tickdev.h + * + * Copyright (C) 2007 Philippe Gerum. + * Copyright (C) 2012 Gilles Chanteperdrix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_TICKDEV_H +#define __LINUX_IPIPE_TICKDEV_H + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPIPE + +struct clock_event_device; + +struct ipipe_hostrt_data { + short live; + seqcount_t seqcount; + time_t wall_time_sec; + u32 wall_time_nsec; + struct timespec wall_to_monotonic; + u64 cycle_last; + u64 mask; + u32 mult; + u32 shift; +}; + +enum clock_event_mode { + CLOCK_EVT_MODE_PERIODIC, + CLOCK_EVT_MODE_ONESHOT, + CLOCK_EVT_MODE_UNUSED, + CLOCK_EVT_MODE_SHUTDOWN, +}; + +struct ipipe_timer { + int irq; + void (*request)(struct ipipe_timer *timer, int steal); + int (*set)(unsigned long ticks, void *timer); + void (*ack)(void); + void (*release)(struct ipipe_timer *timer); + + /* Only if registering a timer directly */ + const char *name; + unsigned rating; + unsigned long freq; + unsigned min_delay_ticks; + const struct cpumask *cpumask; + + /* For internal use */ + void *timer_set; /* pointer passed to ->set() callback */ + struct clock_event_device *host_timer; + struct list_head link; + + /* Conversions between clock frequency and timer frequency */ + unsigned c2t_integ; + unsigned c2t_frac; + + /* For clockevent interception */ + u32 real_mult; + u32 real_shift; + void (*mode_handler)(enum clock_event_mode mode, + struct clock_event_device *); + int orig_mode; + int (*orig_set_state_periodic)(struct clock_event_device *); + int (*orig_set_state_oneshot)(struct clock_event_device *); + int (*orig_set_state_oneshot_stopped)(struct clock_event_device *); + int (*orig_set_state_shutdown)(struct clock_event_device *); + int (*orig_set_next_event)(unsigned long evt, + struct clock_event_device *cdev); + unsigned int (*refresh_freq)(void); +}; + +#define __ipipe_hrtimer_irq __ipipe_raw_cpu_read(ipipe_percpu.hrtimer_irq) + +extern unsigned long __ipipe_hrtimer_freq; + +/* + * Called by clockevents_register_device, to register a piggybacked + * ipipe timer, if there is one + */ +void ipipe_host_timer_register(struct clock_event_device *clkevt); + +/* + * Register a standalone ipipe timer + */ +void ipipe_timer_register(struct ipipe_timer *timer); + +/* + * Chooses the best timer for each cpu. Take over its handling. + */ +int ipipe_select_timers(const struct cpumask *mask); + +/* + * Release the per-cpu timers + */ +void ipipe_timers_release(void); + +/* + * Start handling the per-cpu timer irq, and intercepting the linux clockevent + * device callbacks. + */ +int ipipe_timer_start(void (*tick_handler)(void), + void (*emumode)(enum clock_event_mode mode, + struct clock_event_device *cdev), + int (*emutick)(unsigned long evt, + struct clock_event_device *cdev), + unsigned cpu); + +/* + * Stop handling a per-cpu timer + */ +void ipipe_timer_stop(unsigned cpu); + +/* + * Program the timer + */ +void ipipe_timer_set(unsigned long delay); + +const char *ipipe_timer_name(void); + +unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns); + +void __ipipe_timer_refresh_freq(unsigned int hrclock_freq); + +#else /* !CONFIG_IPIPE */ + +#define ipipe_host_timer_register(clkevt) do { } while (0) + +#endif /* !CONFIG_IPIPE */ + +#ifdef CONFIG_IPIPE_HAVE_HOSTRT +void ipipe_update_hostrt(struct timekeeper *tk); +#else +static inline void +ipipe_update_hostrt(struct timekeeper *tk) {} +#endif + +#endif /* __LINUX_IPIPE_TICKDEV_H */ diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h new file mode 100644 index 000000000000..379c5e3f8b58 --- /dev/null +++ b/include/linux/ipipe_trace.h @@ -0,0 +1,83 @@ +/* -*- linux-c -*- + * include/linux/ipipe_trace.h + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _LINUX_IPIPE_TRACE_H +#define _LINUX_IPIPE_TRACE_H + +#ifdef CONFIG_IPIPE_TRACE + +#include + +#ifndef BROKEN_BUILTIN_RETURN_ADDRESS +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) +#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) +#endif /* !BUILTIN_RETURN_ADDRESS */ + +struct pt_regs; + +void ipipe_trace_begin(unsigned long v); +void ipipe_trace_end(unsigned long v); +void ipipe_trace_freeze(unsigned long v); +void ipipe_trace_special(unsigned char special_id, unsigned long v); +void ipipe_trace_pid(pid_t pid, short prio); +void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); +int ipipe_trace_max_reset(void); +int ipipe_trace_frozen_reset(void); +void ipipe_trace_irqbegin(int irq, struct pt_regs *regs); +void ipipe_trace_irqend(int irq, struct pt_regs *regs); + +#else /* !CONFIG_IPIPE_TRACE */ + +#define ipipe_trace_begin(v) do { (void)(v); } while(0) +#define ipipe_trace_end(v) do { (void)(v); } while(0) +#define ipipe_trace_freeze(v) do { (void)(v); } while(0) +#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) +#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) +#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) +#define ipipe_trace_max_reset() ({ 0; }) +#define ipipe_trace_frozen_reset() ({ 0; }) +#define ipipe_trace_irqbegin(irq, regs) do { } while(0) +#define ipipe_trace_irqend(irq, regs) do { } while(0) + +#endif /* !CONFIG_IPIPE_TRACE */ + +#ifdef CONFIG_IPIPE_TRACE_PANIC +void ipipe_trace_panic_freeze(void); +void ipipe_trace_panic_dump(void); +#else +static inline void ipipe_trace_panic_freeze(void) { } +static inline void ipipe_trace_panic_dump(void) { } +#endif + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF +#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) +#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) +#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) +#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) +#else +#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) +#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) +#define ipipe_trace_irqsoff() do { } while(0) +#define ipipe_trace_irqson() do { } while(0) +#endif + +#endif /* !__LINUX_IPIPE_TRACE_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 5bd3f151da78..548bb8edc38c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -255,7 +255,27 @@ do { \ #endif /* CONFIG_PREEMPT_COUNT */ -#ifdef MODULE +#ifdef CONFIG_IPIPE +#define hard_preempt_disable() \ + ({ \ + unsigned long __flags__; \ + __flags__ = hard_local_irq_save(); \ + if (__ipipe_root_p) \ + preempt_disable(); \ + __flags__; \ + }) + +#define hard_preempt_enable(__flags__) \ + do { \ + if (__ipipe_root_p) { \ + preempt_enable_no_resched(); \ + hard_local_irq_restore(__flags__); \ + preempt_check_resched(); \ + } else \ + hard_local_irq_restore(__flags__); \ + } while (0) + +#elif defined(MODULE) /* * Modules have no business playing preemption tricks. */ @@ -263,7 +283,7 @@ do { \ #undef preempt_enable_no_resched #undef preempt_enable_no_resched_notrace #undef preempt_check_resched -#endif +#endif /* !IPIPE && MODULE */ #define preempt_set_need_resched() \ do { \ diff --git a/init/Kconfig b/init/Kconfig index 46075327c165..ea6e9e4c2b2a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -80,6 +80,7 @@ config COMPILE_TEST config LOCALVERSION string "Local version - append to kernel release" + default "-ipipe" help Append an extra string to the end of your kernel version. This will show up when you type uname, for example. diff --git a/init/main.c b/init/main.c index b32ec72cdf3d..ff9630e49b00 100644 --- a/init/main.c +++ b/init/main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -522,7 +523,7 @@ asmlinkage __visible void __init start_kernel(void) cgroup_init_early(); - local_irq_disable(); + hard_local_irq_disable(); early_boot_irqs_disabled = true; /* @@ -570,6 +571,7 @@ asmlinkage __visible void __init start_kernel(void) pidhash_init(); vfs_caches_init_early(); sort_main_extable(); + __ipipe_init_early(); trap_init(); mm_init(); @@ -617,6 +619,11 @@ asmlinkage __visible void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); + /* + * We need to wait for the interrupt and time subsystems to be + * initialized before enabling the pipeline. + */ + __ipipe_init(); sched_clock_postinit(); printk_safe_init(); perf_event_init(); @@ -914,6 +921,7 @@ static void __init do_basic_setup(void) shmem_init(); driver_init(); init_irq_proc(); + __ipipe_init_proc(); do_ctors(); usermodehelper_enable(); do_initcalls(); diff --git a/kernel/Makefile b/kernel/Makefile index 172d151d429c..06c944bef9a3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o +obj-$(CONFIG_IPIPE) += ipipe/ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig new file mode 100644 index 000000000000..c8d38e5c246a --- /dev/null +++ b/kernel/ipipe/Kconfig @@ -0,0 +1,44 @@ +config IPIPE + bool "Interrupt pipeline" + default y + ---help--- + Activate this option if you want the interrupt pipeline to be + compiled in. + +config IPIPE_CORE + def_bool y if IPIPE + +config IPIPE_WANT_CLOCKSOURCE + bool + +config IPIPE_WANT_PTE_PINNING + bool + +config IPIPE_CORE_APIREV + int + depends on IPIPE + default 2 + ---help--- + The API revision level we implement. + +config IPIPE_WANT_APIREV_2 + bool + +config IPIPE_TARGET_APIREV + int + depends on IPIPE + default IPIPE_CORE_APIREV + ---help--- + The API revision level the we want (must be <= + IPIPE_CORE_APIREV). + +config IPIPE_HAVE_HOSTRT + bool + +config IPIPE_HAVE_EAGER_FPU + bool + +if IPIPE && ARM && RAW_PRINTK && !DEBUG_LL +comment "CAUTION: DEBUG_LL must be selected, and properly configured for" +comment "RAW_PRINTK to work. Otherwise, you will get no output on raw_printk()" +endif diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug new file mode 100644 index 000000000000..cee7fab0ee07 --- /dev/null +++ b/kernel/ipipe/Kconfig.debug @@ -0,0 +1,96 @@ +config IPIPE_DEBUG + bool "I-pipe debugging" + depends on IPIPE + select RAW_PRINTK + +config IPIPE_DEBUG_CONTEXT + bool "Check for illicit cross-domain calls" + depends on IPIPE_DEBUG + default y + ---help--- + Enable this feature to arm checkpoints in the kernel that + verify the correct invocation context. On entry of critical + Linux services a warning is issued if the caller is not + running over the root domain. + +config IPIPE_DEBUG_INTERNAL + bool "Enable internal debug checks" + depends on IPIPE_DEBUG + default y + ---help--- + When this feature is enabled, I-pipe will perform internal + consistency checks of its subsystems, e.g. on per-cpu variable + access. + +config IPIPE_TRACE + bool "Latency tracing" + depends on IPIPE_DEBUG + select CONFIG_FTRACE + select CONFIG_FUNCTION_TRACER + select KALLSYMS + select PROC_FS + ---help--- + Activate this option if you want to use per-function tracing of + the kernel. The tracer will collect data via instrumentation + features like the one below or with the help of explicite calls + of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the + in-kernel tracing API. The collected data and runtime control + is available via /proc/ipipe/trace/*. + +if IPIPE_TRACE + +config IPIPE_TRACE_ENABLE + bool "Enable tracing on boot" + default y + ---help--- + Disable this option if you want to arm the tracer after booting + manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce + boot time on slow embedded devices due to the tracer overhead. + +config IPIPE_TRACE_MCOUNT + bool "Instrument function entries" + default y + select FTRACE + select FUNCTION_TRACER + ---help--- + When enabled, records every kernel function entry in the tracer + log. While this slows down the system noticeably, it provides + the highest level of information about the flow of events. + However, it can be switch off in order to record only explicit + I-pipe trace points. + +config IPIPE_TRACE_IRQSOFF + bool "Trace IRQs-off times" + default y + ---help--- + Activate this option if I-pipe shall trace the longest path + with hard-IRQs switched off. + +config IPIPE_TRACE_SHIFT + int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" + range 10 18 + default 14 + ---help--- + The number of trace points to hold tracing data for each + trace path, as a power of 2. + +config IPIPE_TRACE_VMALLOC + bool "Use vmalloc'ed trace buffer" + default y if EMBEDDED + ---help--- + Instead of reserving static kernel data, the required buffer + is allocated via vmalloc during boot-up when this option is + enabled. This can help to start systems that are low on memory, + but it slightly degrades overall performance. Try this option + when a traced kernel hangs unexpectedly at boot time. + +config IPIPE_TRACE_PANIC + bool "Enable panic back traces" + default y + ---help--- + Provides services to freeze and dump a back trace on panic + situations. This is used on IPIPE_DEBUG_CONTEXT exceptions + as well as ordinary kernel oopses. You can control the number + of printed back trace points via /proc/ipipe/trace. + +endif diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile new file mode 100644 index 000000000000..73755150634f --- /dev/null +++ b/kernel/ipipe/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_IPIPE) += core.o timer.o +obj-$(CONFIG_IPIPE_TRACE) += tracer.o diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c new file mode 100644 index 000000000000..91b68ec6aff9 --- /dev/null +++ b/kernel/ipipe/core.c @@ -0,0 +1,1879 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/core.c + * + * Copyright (C) 2002-2012 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-independent I-PIPE core support. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PROC_FS +#include +#include +#endif /* CONFIG_PROC_FS */ +#include +#include +#include +#include +#include + +struct ipipe_domain ipipe_root; +EXPORT_SYMBOL_GPL(ipipe_root); + +struct ipipe_domain *ipipe_head_domain = &ipipe_root; +EXPORT_SYMBOL_GPL(ipipe_head_domain); + +#ifdef CONFIG_SMP +static __initdata struct ipipe_percpu_domain_data bootup_context = { + .status = IPIPE_STALL_MASK, + .domain = &ipipe_root, +}; +#else +#define bootup_context ipipe_percpu.root +#endif /* !CONFIG_SMP */ + +DEFINE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu) = { + .root = { + .status = IPIPE_STALL_MASK, + .domain = &ipipe_root, + }, + .curr = &bootup_context, + .hrtimer_irq = -1, +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + .context_check = 1, +#endif +}; +EXPORT_PER_CPU_SYMBOL(ipipe_percpu); + +/* Up to 2k of pending work data per CPU. */ +#define WORKBUF_SIZE 2048 +static DEFINE_PER_CPU_ALIGNED(unsigned char[WORKBUF_SIZE], work_buf); +static DEFINE_PER_CPU(void *, work_tail); +static unsigned int __ipipe_work_virq; + +static void __ipipe_do_work(unsigned int virq, void *cookie); + +#ifdef CONFIG_SMP + +#define IPIPE_CRITICAL_TIMEOUT 1000000 +static cpumask_t __ipipe_cpu_sync_map; +static cpumask_t __ipipe_cpu_lock_map; +static cpumask_t __ipipe_cpu_pass_map; +static unsigned long __ipipe_critical_lock; +static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); +static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); +static void (*__ipipe_cpu_sync) (void); + +#else /* !CONFIG_SMP */ +/* + * Create an alias to the unique root status, so that arch-dep code + * may get fast access to this percpu variable including from + * assembly. A hard-coded assumption is that root.status appears at + * offset #0 of the ipipe_percpu struct. + */ +extern unsigned long __ipipe_root_status +__attribute__((alias(__stringify(ipipe_percpu)))); +EXPORT_SYMBOL(__ipipe_root_status); + +#endif /* !CONFIG_SMP */ + +IPIPE_DEFINE_SPINLOCK(__ipipe_lock); + +static unsigned long __ipipe_virtual_irq_map; + +#ifdef CONFIG_PRINTK +unsigned int __ipipe_printk_virq; +int __ipipe_printk_bypass; +#endif /* CONFIG_PRINTK */ + +#ifdef CONFIG_PROC_FS + +struct proc_dir_entry *ipipe_proc_root; + +static int __ipipe_version_info_show(struct seq_file *p, void *data) +{ + seq_printf(p, "%d\n", IPIPE_CORE_RELEASE); + return 0; +} + +static int __ipipe_version_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_version_info_show, NULL); +} + +static const struct file_operations __ipipe_version_proc_ops = { + .open = __ipipe_version_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __ipipe_common_info_show(struct seq_file *p, void *data) +{ + struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; + char handling, lockbit, virtuality; + unsigned long ctlbits; + unsigned int irq; + + seq_printf(p, " +--- Handled\n"); + seq_printf(p, " |+-- Locked\n"); + seq_printf(p, " ||+- Virtual\n"); + seq_printf(p, " [IRQ] ||| Handler\n"); + + mutex_lock(&ipd->mutex); + + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { + ctlbits = ipd->irqs[irq].control; + /* + * There might be a hole between the last external IRQ + * and the first virtual one; skip it. + */ + if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) + continue; + + if (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) + /* Non-allocated virtual IRQ; skip it. */ + continue; + + if (ctlbits & IPIPE_HANDLE_MASK) + handling = 'H'; + else + handling = '.'; + + if (ctlbits & IPIPE_LOCK_MASK) + lockbit = 'L'; + else + lockbit = '.'; + + if (ipipe_virtual_irq_p(irq)) + virtuality = 'V'; + else + virtuality = '.'; + + if (ctlbits & IPIPE_HANDLE_MASK) + seq_printf(p, " %4u: %c%c%c %pf\n", + irq, handling, lockbit, virtuality, + ipd->irqs[irq].handler); + else + seq_printf(p, " %4u: %c%c%c\n", + irq, handling, lockbit, virtuality); + } + + mutex_unlock(&ipd->mutex); + + return 0; +} + +static int __ipipe_common_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_common_info_show, PDE_DATA(inode)); +} + +static const struct file_operations __ipipe_info_proc_ops = { + .owner = THIS_MODULE, + .open = __ipipe_common_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void add_domain_proc(struct ipipe_domain *ipd) +{ + proc_create_data(ipd->name, 0444, ipipe_proc_root, + &__ipipe_info_proc_ops, ipd); +} + +void remove_domain_proc(struct ipipe_domain *ipd) +{ + remove_proc_entry(ipd->name, ipipe_proc_root); +} + +void __init __ipipe_init_proc(void) +{ + ipipe_proc_root = proc_mkdir("ipipe", NULL); + proc_create("version", 0444, ipipe_proc_root, + &__ipipe_version_proc_ops); + add_domain_proc(ipipe_root_domain); + + __ipipe_init_tracer(); +} + +#else + +static inline void add_domain_proc(struct ipipe_domain *ipd) +{ +} + +static inline void remove_domain_proc(struct ipipe_domain *ipd) +{ +} + +#endif /* CONFIG_PROC_FS */ + +static void init_stage(struct ipipe_domain *ipd) +{ + memset(&ipd->irqs, 0, sizeof(ipd->irqs)); + mutex_init(&ipd->mutex); + __ipipe_hook_critical_ipi(ipd); +} + +static inline int root_context_offset(void) +{ + void root_context_not_at_start_of_ipipe_percpu(void); + + /* ipipe_percpu.root must be found at offset #0. */ + + if (offsetof(struct ipipe_percpu_data, root)) + root_context_not_at_start_of_ipipe_percpu(); + + return 0; +} + +#ifdef CONFIG_SMP + +static inline void fixup_percpu_data(void) +{ + struct ipipe_percpu_data *p; + int cpu; + + /* + * ipipe_percpu.curr cannot be assigned statically to + * &ipipe_percpu.root, due to the dynamic nature of percpu + * data. So we make ipipe_percpu.curr refer to a temporary + * boot up context in static memory, until we can fixup all + * context pointers in this routine, after per-cpu areas have + * been eventually set up. The temporary context data is + * copied to per_cpu(ipipe_percpu, 0).root in the same move. + * + * Obviously, this code must run over the boot CPU, before SMP + * operations start. + */ + BUG_ON(smp_processor_id() || !irqs_disabled()); + + per_cpu(ipipe_percpu, 0).root = bootup_context; + + for_each_possible_cpu(cpu) { + p = &per_cpu(ipipe_percpu, cpu); + p->curr = &p->root; + } +} + +#else /* !CONFIG_SMP */ + +static inline void fixup_percpu_data(void) { } + +#endif /* CONFIG_SMP */ + +void __init __ipipe_init_early(void) +{ + struct ipipe_domain *ipd = &ipipe_root; + int cpu; + + fixup_percpu_data(); + + /* + * A lightweight registration code for the root domain. We are + * running on the boot CPU, hw interrupts are off, and + * secondary CPUs are still lost in space. + */ + ipd->name = "Linux"; + ipd->context_offset = root_context_offset(); + init_stage(ipd); + + /* + * Do the early init stuff. First we do the per-arch pipeline + * core setup, then we run the per-client setup code. At this + * point, the kernel does not provide much services yet: be + * careful. + */ + __ipipe_early_core_setup(); + __ipipe_early_client_setup(); + +#ifdef CONFIG_PRINTK + __ipipe_printk_virq = ipipe_alloc_virq(); + ipd->irqs[__ipipe_printk_virq].handler = __ipipe_flush_printk; + ipd->irqs[__ipipe_printk_virq].cookie = NULL; + ipd->irqs[__ipipe_printk_virq].ackfn = NULL; + ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; +#endif /* CONFIG_PRINTK */ + + __ipipe_work_virq = ipipe_alloc_virq(); + ipd->irqs[__ipipe_work_virq].handler = __ipipe_do_work; + ipd->irqs[__ipipe_work_virq].cookie = NULL; + ipd->irqs[__ipipe_work_virq].ackfn = NULL; + ipd->irqs[__ipipe_work_virq].control = IPIPE_HANDLE_MASK; + + for_each_possible_cpu(cpu) + per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu); +} + +void __init __ipipe_init(void) +{ + /* Now we may engage the pipeline. */ + __ipipe_enable_pipeline(); + + pr_info("Interrupt pipeline (release #%d)\n", IPIPE_CORE_RELEASE); +} + +static inline void init_head_stage(struct ipipe_domain *ipd) +{ + struct ipipe_percpu_domain_data *p; + int cpu; + + /* Must be set first, used in ipipe_percpu_context(). */ + ipd->context_offset = offsetof(struct ipipe_percpu_data, head); + + for_each_online_cpu(cpu) { + p = ipipe_percpu_context(ipd, cpu); + memset(p, 0, sizeof(*p)); + p->domain = ipd; + } + + init_stage(ipd); +} + +void ipipe_register_head(struct ipipe_domain *ipd, const char *name) +{ + BUG_ON(!ipipe_root_p || ipd == &ipipe_root); + + ipd->name = name; + init_head_stage(ipd); + barrier(); + ipipe_head_domain = ipd; + add_domain_proc(ipd); + + pr_info("I-pipe: head domain %s registered.\n", name); +} +EXPORT_SYMBOL_GPL(ipipe_register_head); + +void ipipe_unregister_head(struct ipipe_domain *ipd) +{ + BUG_ON(!ipipe_root_p || ipd != ipipe_head_domain); + + ipipe_head_domain = &ipipe_root; + smp_mb(); + mutex_lock(&ipd->mutex); + remove_domain_proc(ipd); + mutex_unlock(&ipd->mutex); + + pr_info("I-pipe: head domain %s unregistered.\n", ipd->name); +} +EXPORT_SYMBOL_GPL(ipipe_unregister_head); + +void ipipe_unstall_root(void) +{ + struct ipipe_percpu_domain_data *p; + + hard_local_irq_disable(); + + /* This helps catching bad usage from assembly call sites. */ + ipipe_root_only(); + + p = ipipe_this_cpu_root_context(); + + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_stage(); + + hard_local_irq_enable(); +} +EXPORT_SYMBOL(ipipe_unstall_root); + +void ipipe_restore_root(unsigned long x) +{ + ipipe_root_only(); + + if (x) + ipipe_stall_root(); + else + ipipe_unstall_root(); +} +EXPORT_SYMBOL(ipipe_restore_root); + +void __ipipe_restore_root_nosync(unsigned long x) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_root_context(); + + if (raw_irqs_disabled_flags(x)) { + __set_bit(IPIPE_STALL_FLAG, &p->status); + trace_hardirqs_off(); + } else { + trace_hardirqs_on(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + } +} +EXPORT_SYMBOL_GPL(__ipipe_restore_root_nosync); + +void ipipe_unstall_head(void) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(); + + hard_local_irq_disable(); + + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_pipeline(ipipe_head_domain); + + hard_local_irq_enable(); +} +EXPORT_SYMBOL_GPL(ipipe_unstall_head); + +void __ipipe_restore_head(unsigned long x) /* hw interrupt off */ +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(); + + if (x) { +#ifdef CONFIG_DEBUG_KERNEL + static int warned; + if (!warned && + __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { + /* + * Already stalled albeit ipipe_restore_head() + * should have detected it? Send a warning once. + */ + hard_local_irq_enable(); + warned = 1; + pr_warning("I-pipe: ipipe_restore_head() " + "optimization failed.\n"); + dump_stack(); + hard_local_irq_disable(); + } +#else /* !CONFIG_DEBUG_KERNEL */ + __set_bit(IPIPE_STALL_FLAG, &p->status); +#endif /* CONFIG_DEBUG_KERNEL */ + } else { + __clear_bit(IPIPE_STALL_FLAG, &p->status); + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_pipeline(ipipe_head_domain); + hard_local_irq_enable(); + } +} +EXPORT_SYMBOL_GPL(__ipipe_restore_head); + +void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock) +{ + hard_local_irq_disable(); + if (ipipe_smp_p) + arch_spin_lock(&lock->arch_lock); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irq); + +void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock) +{ + if (ipipe_smp_p) + arch_spin_unlock(&lock->arch_lock); + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + hard_local_irq_enable(); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irq); + +unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock) +{ + unsigned long flags; + int s; + + flags = hard_local_irq_save(); + if (ipipe_smp_p) + arch_spin_lock(&lock->arch_lock); + s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + + return arch_mangle_irq_bits(s, flags); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irqsave); + +int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, + unsigned long *x) +{ + unsigned long flags; + int s; + + flags = hard_local_irq_save(); + if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) { + hard_local_irq_restore(flags); + return 0; + } + s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + *x = arch_mangle_irq_bits(s, flags); + + return 1; +} +EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irqsave); + +void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, + unsigned long x) +{ + if (ipipe_smp_p) + arch_spin_unlock(&lock->arch_lock); + if (!arch_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + hard_local_irq_restore(x); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irqrestore); + +int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock) +{ + unsigned long flags; + + flags = hard_local_irq_save(); + if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) { + hard_local_irq_restore(flags); + return 0; + } + __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + + return 1; +} +EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irq); + +void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) +{ + if (ipipe_smp_p) + arch_spin_unlock(&lock->arch_lock); +} + +void __ipipe_spin_unlock_irqcomplete(unsigned long x) +{ + if (!arch_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + hard_local_irq_restore(x); +} + +#ifdef __IPIPE_3LEVEL_IRQMAP + +/* Must be called hw IRQs off. */ +static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, + unsigned int irq) +{ + __set_bit(irq, p->irqheld_map); + p->irqall[irq]++; +} + +/* Must be called hw IRQs off. */ +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); + int l0b, l1b; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { + __set_bit(irq, p->irqpend_lomap); + __set_bit(l1b, p->irqpend_mdmap); + __set_bit(l0b, &p->irqpend_himap); + } else + __set_bit(irq, p->irqheld_map); + + p->irqall[irq]++; +} +EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); + +/* Must be called hw IRQs off. */ +void __ipipe_lock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_root_domain; + struct ipipe_percpu_domain_data *p; + int l0b, l1b; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + /* + * Interrupts requested by a registered head domain cannot be + * locked, since this would make no sense: interrupts are + * globally masked at CPU level when the head domain is + * stalled, so there is no way we could encounter the + * situation IRQ locks are handling. + */ + if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + return; + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + p = ipipe_this_cpu_context(ipd); + if (__test_and_clear_bit(irq, p->irqpend_lomap)) { + __set_bit(irq, p->irqheld_map); + if (p->irqpend_lomap[l1b] == 0) { + __clear_bit(l1b, p->irqpend_mdmap); + if (p->irqpend_mdmap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + } + } +} +EXPORT_SYMBOL_GPL(__ipipe_lock_irq); + +/* Must be called hw IRQs off. */ +void __ipipe_unlock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_root_domain; + struct ipipe_percpu_domain_data *p; + int l0b, l1b, cpu; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + return; + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + for_each_online_cpu(cpu) { + p = ipipe_this_cpu_root_context(); + if (test_and_clear_bit(irq, p->irqheld_map)) { + /* We need atomic ops here: */ + set_bit(irq, p->irqpend_lomap); + set_bit(l1b, p->irqpend_mdmap); + set_bit(l0b, &p->irqpend_himap); + } + } +} +EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); + +static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) +{ + int l0b, l1b, l2b; + unsigned long l0m, l1m, l2m; + unsigned int irq; + + l0m = p->irqpend_himap; + if (unlikely(l0m == 0)) + return -1; + + l0b = __ipipe_ffnz(l0m); + l1m = p->irqpend_mdmap[l0b]; + if (unlikely(l1m == 0)) + return -1; + + l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG; + l2m = p->irqpend_lomap[l1b]; + if (unlikely(l2m == 0)) + return -1; + + l2b = __ipipe_ffnz(l2m); + irq = l1b * BITS_PER_LONG + l2b; + + __clear_bit(irq, p->irqpend_lomap); + if (p->irqpend_lomap[l1b] == 0) { + __clear_bit(l1b, p->irqpend_mdmap); + if (p->irqpend_mdmap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + } + + return irq; +} + +#else /* __IPIPE_2LEVEL_IRQMAP */ + +/* Must be called hw IRQs off. */ +static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, + unsigned int irq) +{ + __set_bit(irq, p->irqheld_map); + p->irqall[irq]++; +} + +/* Must be called hw IRQs off. */ +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); + int l0b = irq / BITS_PER_LONG; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { + __set_bit(irq, p->irqpend_lomap); + __set_bit(l0b, &p->irqpend_himap); + } else + __set_bit(irq, p->irqheld_map); + + p->irqall[irq]++; +} +EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); + +/* Must be called hw IRQs off. */ +void __ipipe_lock_irq(unsigned int irq) +{ + struct ipipe_percpu_domain_data *p; + int l0b = irq / BITS_PER_LONG; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (test_and_set_bit(IPIPE_LOCK_FLAG, + &ipipe_root_domain->irqs[irq].control)) + return; + + p = ipipe_this_cpu_root_context(); + if (__test_and_clear_bit(irq, p->irqpend_lomap)) { + __set_bit(irq, p->irqheld_map); + if (p->irqpend_lomap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + } +} +EXPORT_SYMBOL_GPL(__ipipe_lock_irq); + +/* Must be called hw IRQs off. */ +void __ipipe_unlock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_root_domain; + struct ipipe_percpu_domain_data *p; + int l0b = irq / BITS_PER_LONG, cpu; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + return; + + for_each_online_cpu(cpu) { + p = ipipe_percpu_context(ipd, cpu); + if (test_and_clear_bit(irq, p->irqheld_map)) { + /* We need atomic ops here: */ + set_bit(irq, p->irqpend_lomap); + set_bit(l0b, &p->irqpend_himap); + } + } +} +EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); + +static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) +{ + unsigned long l0m, l1m; + int l0b, l1b; + + l0m = p->irqpend_himap; + if (unlikely(l0m == 0)) + return -1; + + l0b = __ipipe_ffnz(l0m); + l1m = p->irqpend_lomap[l0b]; + if (unlikely(l1m == 0)) + return -1; + + l1b = __ipipe_ffnz(l1m); + __clear_bit(l1b, &p->irqpend_lomap[l0b]); + if (p->irqpend_lomap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + + return l0b * BITS_PER_LONG + l1b; +} + +#endif /* __IPIPE_2LEVEL_IRQMAP */ + +void __ipipe_do_sync_pipeline(struct ipipe_domain *top) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_domain *ipd; + + /* We must enter over the root domain. */ + IPIPE_WARN_ONCE(__ipipe_current_domain != ipipe_root_domain); + ipd = top; +next: + p = ipipe_this_cpu_context(ipd); + if (test_bit(IPIPE_STALL_FLAG, &p->status)) + return; + + if (__ipipe_ipending_p(p)) { + if (ipd == ipipe_root_domain) + __ipipe_sync_stage(); + else { + /* Switching to head. */ + p->coflags &= ~__IPIPE_ALL_R; + __ipipe_set_current_context(p); + __ipipe_sync_stage(); + __ipipe_set_current_domain(ipipe_root_domain); + } + } + + if (ipd != ipipe_root_domain) { + ipd = ipipe_root_domain; + goto next; + } +} +EXPORT_SYMBOL_GPL(__ipipe_do_sync_pipeline); + +unsigned int ipipe_alloc_virq(void) +{ + unsigned long flags, irq = 0; + int ipos; + + raw_spin_lock_irqsave(&__ipipe_lock, flags); + + if (__ipipe_virtual_irq_map != ~0) { + ipos = ffz(__ipipe_virtual_irq_map); + set_bit(ipos, &__ipipe_virtual_irq_map); + irq = ipos + IPIPE_VIRQ_BASE; + } + + raw_spin_unlock_irqrestore(&__ipipe_lock, flags); + + return irq; +} +EXPORT_SYMBOL_GPL(ipipe_alloc_virq); + +void ipipe_free_virq(unsigned int virq) +{ + clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); + smp_mb__after_atomic(); +} +EXPORT_SYMBOL_GPL(ipipe_free_virq); + +int ipipe_request_irq(struct ipipe_domain *ipd, + unsigned int irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t ackfn) +{ + unsigned long flags; + int ret = 0; + + ipipe_root_only(); + + if (handler == NULL || + (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))) + return -EINVAL; + + raw_spin_lock_irqsave(&__ipipe_lock, flags); + + if (ipd->irqs[irq].handler) { + ret = -EBUSY; + goto out; + } + + if (ackfn == NULL) + ackfn = ipipe_root_domain->irqs[irq].ackfn; + + ipd->irqs[irq].handler = handler; + ipd->irqs[irq].cookie = cookie; + ipd->irqs[irq].ackfn = ackfn; + ipd->irqs[irq].control = IPIPE_HANDLE_MASK; + + if (irq < IPIPE_NR_ROOT_IRQS) + __ipipe_enable_irqdesc(ipd, irq); +out: + raw_spin_unlock_irqrestore(&__ipipe_lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ipipe_request_irq); + +void ipipe_free_irq(struct ipipe_domain *ipd, + unsigned int irq) +{ + unsigned long flags; + + ipipe_root_only(); + + raw_spin_lock_irqsave(&__ipipe_lock, flags); + + if (ipd->irqs[irq].handler == NULL) + goto out; + + ipd->irqs[irq].handler = NULL; + ipd->irqs[irq].cookie = NULL; + ipd->irqs[irq].ackfn = NULL; + ipd->irqs[irq].control = 0; + + if (irq < IPIPE_NR_ROOT_IRQS) + __ipipe_disable_irqdesc(ipd, irq); +out: + raw_spin_unlock_irqrestore(&__ipipe_lock, flags); +} +EXPORT_SYMBOL_GPL(ipipe_free_irq); + +void ipipe_set_hooks(struct ipipe_domain *ipd, int enables) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + int cpu, wait; + + if (ipd == ipipe_root_domain) { + IPIPE_WARN(enables & __IPIPE_TRAP_E); + enables &= ~__IPIPE_TRAP_E; + } else { + IPIPE_WARN(enables & __IPIPE_KEVENT_E); + enables &= ~__IPIPE_KEVENT_E; + } + + flags = ipipe_critical_enter(NULL); + + for_each_online_cpu(cpu) { + p = ipipe_percpu_context(ipd, cpu); + p->coflags &= ~__IPIPE_ALL_E; + p->coflags |= enables; + } + + wait = (enables ^ __IPIPE_ALL_E) << __IPIPE_SHIFT_R; + if (wait == 0 || !__ipipe_root_p) { + ipipe_critical_exit(flags); + return; + } + + ipipe_this_cpu_context(ipd)->coflags &= ~wait; + + ipipe_critical_exit(flags); + + /* + * In case we cleared some hooks over the root domain, we have + * to wait for any ongoing execution to finish, since our + * caller might subsequently unmap the target domain code. + * + * We synchronize with the relevant __ipipe_notify_*() + * helpers, disabling all hooks before we start waiting for + * completion on all CPUs. + */ + for_each_online_cpu(cpu) { + while (ipipe_percpu_context(ipd, cpu)->coflags & wait) + schedule_timeout_interruptible(HZ / 50); + } +} +EXPORT_SYMBOL_GPL(ipipe_set_hooks); + +int __weak ipipe_fastcall_hook(struct pt_regs *regs) +{ + return -1; /* i.e. fall back to slow path. */ +} + +int __weak ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs) +{ + return 0; +} + +int __weak ipipe_get_domain_slope_hook(struct task_struct *prev, + struct task_struct *next) +{ + /* + * A co-kernel must provide this hook, or bad things may + * happen when sections protected by fpu_kernel_begin(), + * fpu_kernel_end() pairs are preempted by co-kernel threads + * also using the FPU! + */ + return 0; +} + +void __ipipe_root_sync(void) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + + flags = hard_local_irq_save(); + + p = ipipe_this_cpu_root_context(); + if (__ipipe_ipending_p(p)) + __ipipe_sync_stage(); + + hard_local_irq_restore(flags); +} + +int __ipipe_notify_syscall(struct pt_regs *regs) +{ + struct ipipe_domain *caller_domain, *this_domain, *ipd; + struct ipipe_percpu_domain_data *p; + unsigned long flags; + int ret = 0; + + /* + * We should definitely not pipeline a syscall with IRQs off. + */ + IPIPE_WARN_ONCE(hard_irqs_disabled()); + + flags = hard_local_irq_save(); + caller_domain = this_domain = __ipipe_current_domain; + ipd = ipipe_head_domain; +next: + p = ipipe_this_cpu_context(ipd); + if (likely(p->coflags & __IPIPE_SYSCALL_E)) { + __ipipe_set_current_context(p); + p->coflags |= __IPIPE_SYSCALL_R; + hard_local_irq_restore(flags); + ret = ipipe_syscall_hook(caller_domain, regs); + flags = hard_local_irq_save(); + p->coflags &= ~__IPIPE_SYSCALL_R; + if (__ipipe_current_domain != ipd) + /* Account for domain migration. */ + this_domain = __ipipe_current_domain; + else + __ipipe_set_current_domain(this_domain); + } + + if (this_domain == ipipe_root_domain) { + if (ipd != ipipe_root_domain && ret == 0) { + ipd = ipipe_root_domain; + goto next; + } + /* + * Careful: we may have migrated from head->root, so p + * would be ipipe_this_cpu_context(head). + */ + p = ipipe_this_cpu_root_context(); + if (__ipipe_ipending_p(p)) + __ipipe_sync_stage(); + } else if (ipipe_test_thread_flag(TIP_MAYDAY)) + __ipipe_call_mayday(regs); + + hard_local_irq_restore(flags); + + return ret; +} + +int __weak ipipe_trap_hook(struct ipipe_trap_data *data) +{ + return 0; +} + +int __ipipe_notify_trap(int exception, struct pt_regs *regs) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_trap_data data; + unsigned long flags; + int ret = 0; + + flags = hard_local_irq_save(); + + /* + * We send a notification about all traps raised over a + * registered head domain only. + */ + if (__ipipe_root_p) + goto out; + + p = ipipe_this_cpu_head_context(); + if (likely(p->coflags & __IPIPE_TRAP_E)) { + p->coflags |= __IPIPE_TRAP_R; + hard_local_irq_restore(flags); + data.exception = exception; + data.regs = regs; + ret = ipipe_trap_hook(&data); + flags = hard_local_irq_save(); + p->coflags &= ~__IPIPE_TRAP_R; + } +out: + hard_local_irq_restore(flags); + + return ret; +} + +int __weak ipipe_kevent_hook(int kevent, void *data) +{ + return 0; +} + +int __ipipe_notify_kevent(int kevent, void *data) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + int ret = 0; + + ipipe_root_only(); + + flags = hard_local_irq_save(); + + p = ipipe_this_cpu_root_context(); + if (likely(p->coflags & __IPIPE_KEVENT_E)) { + p->coflags |= __IPIPE_KEVENT_R; + hard_local_irq_restore(flags); + ret = ipipe_kevent_hook(kevent, data); + flags = hard_local_irq_save(); + p->coflags &= ~__IPIPE_KEVENT_R; + } + + hard_local_irq_restore(flags); + + return ret; +} + +void __ipipe_notify_vm_preemption(void) +{ + struct ipipe_vm_notifier *vmf; + struct ipipe_percpu_data *p; + + ipipe_check_irqoff(); + p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + vmf = p->vm_notifier; + if (unlikely(vmf)) + vmf->handler(vmf); +} +EXPORT_SYMBOL_GPL(__ipipe_notify_vm_preemption); + +static void dispatch_irq_head(unsigned int irq) /* hw interrupts off */ +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(), *old; + struct ipipe_domain *head = p->domain; + + if (unlikely(test_bit(IPIPE_STALL_FLAG, &p->status))) { + __ipipe_set_irq_pending(head, irq); + return; + } + + /* Switch to the head domain if not current. */ + old = __ipipe_current_context; + if (old != p) + __ipipe_set_current_context(p); + + p->irqall[irq]++; + __set_bit(IPIPE_STALL_FLAG, &p->status); + barrier(); + head->irqs[irq].handler(irq, head->irqs[irq].cookie); + __ipipe_run_irqtail(irq); + hard_local_irq_disable(); + p = ipipe_this_cpu_head_context(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + /* Are we still running in the head domain? */ + if (likely(__ipipe_current_context == p)) { + /* Did we enter this code over the head domain? */ + if (old->domain == head) { + /* Yes, do immediate synchronization. */ + if (__ipipe_ipending_p(p)) + __ipipe_sync_stage(); + return; + } + __ipipe_set_current_context(ipipe_this_cpu_root_context()); + } + + /* + * We must be running over the root domain, synchronize + * the pipeline for high priority IRQs (slow path). + */ + __ipipe_do_sync_pipeline(head); +} + +void __ipipe_dispatch_irq(unsigned int irq, int flags) /* hw interrupts off */ +{ + struct ipipe_domain *ipd; + struct irq_desc *desc; + unsigned long control; + int chained_irq; + + /* + * Survival kit when reading this code: + * + * - we have two main situations, leading to three cases for + * handling interrupts: + * + * a) the root domain is alone, no registered head domain + * => all interrupts go through the interrupt log + * b) a head domain is registered + * => head domain IRQs go through the fast dispatcher + * => root domain IRQs go through the interrupt log + * + * - when no head domain is registered, ipipe_head_domain == + * ipipe_root_domain == &ipipe_root. + * + * - the caller tells us whether we should acknowledge this + * IRQ. Even virtual IRQs may require acknowledge on some + * platforms (e.g. arm/SMP). + * + * - the caller tells us whether we may try to run the IRQ log + * syncer. Typically, demuxed IRQs won't be synced + * immediately. + * + * - multiplex IRQs most likely have a valid acknowledge + * handler and we may not be called with IPIPE_IRQF_NOACK + * for them. The ack handler for the multiplex IRQ actually + * decodes the demuxed interrupts. + */ + +#ifdef CONFIG_IPIPE_DEBUG + if (unlikely(irq >= IPIPE_NR_IRQS) || + (irq < IPIPE_NR_ROOT_IRQS && irq_to_desc(irq) == NULL)) { + pr_err("I-pipe: spurious interrupt %u\n", irq); + return; + } +#endif + /* + * CAUTION: on some archs, virtual IRQs may have acknowledge + * handlers. Multiplex IRQs should have one too. + */ + if (unlikely(irq >= IPIPE_NR_ROOT_IRQS)) { + desc = NULL; + chained_irq = 0; + } else { + desc = irq_to_desc(irq); + chained_irq = desc ? ipipe_chained_irq_p(desc) : 0; + } + if (flags & IPIPE_IRQF_NOACK) + IPIPE_WARN_ONCE(chained_irq); + else { + ipd = ipipe_head_domain; + control = ipd->irqs[irq].control; + if ((control & IPIPE_HANDLE_MASK) == 0) + ipd = ipipe_root_domain; + if (ipd->irqs[irq].ackfn) + ipd->irqs[irq].ackfn(desc); + if (chained_irq) { + if ((flags & IPIPE_IRQF_NOSYNC) == 0) + /* Run demuxed IRQ handlers. */ + goto sync; + return; + } + } + + /* + * Sticky interrupts must be handled early and separately, so + * that we always process them on the current domain. + */ + ipd = __ipipe_current_domain; + control = ipd->irqs[irq].control; + if (control & IPIPE_STICKY_MASK) + goto log; + + /* + * In case we have no registered head domain + * (i.e. ipipe_head_domain == &ipipe_root), we always go + * through the interrupt log, and leave the dispatching work + * ultimately to __ipipe_sync_pipeline(). + */ + ipd = ipipe_head_domain; + control = ipd->irqs[irq].control; + if (ipd == ipipe_root_domain) + /* + * The root domain must handle all interrupts, so + * testing the HANDLE bit would be pointless. + */ + goto log; + + if (control & IPIPE_HANDLE_MASK) { + if (unlikely(flags & IPIPE_IRQF_NOSYNC)) + __ipipe_set_irq_pending(ipd, irq); + else + dispatch_irq_head(irq); + return; + } + + ipd = ipipe_root_domain; +log: + __ipipe_set_irq_pending(ipd, irq); + + if (flags & IPIPE_IRQF_NOSYNC) + return; + + /* + * Optimize if we preempted a registered high priority head + * domain: we don't need to synchronize the pipeline unless + * there is a pending interrupt for it. + */ + if (!__ipipe_root_p && + !__ipipe_ipending_p(ipipe_this_cpu_head_context())) + return; +sync: + __ipipe_sync_pipeline(ipipe_head_domain); +} + +void ipipe_raise_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_head_domain; + unsigned long flags, control; + + flags = hard_local_irq_save(); + + /* + * Fast path: raising a virtual IRQ handled by the head + * domain. + */ + if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) { + control = ipd->irqs[irq].control; + if (likely(control & IPIPE_HANDLE_MASK)) { + dispatch_irq_head(irq); + goto out; + } + } + + /* Emulate regular device IRQ receipt. */ + __ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK); +out: + hard_local_irq_restore(flags); + +} +EXPORT_SYMBOL_GPL(ipipe_raise_irq); + +static void sync_root_irqs(void) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + + flags = hard_local_irq_save(); + + p = ipipe_this_cpu_root_context(); + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_stage(); + + hard_local_irq_restore(flags); +} + +int ipipe_handle_syscall(struct thread_info *ti, + unsigned long nr, struct pt_regs *regs) +{ + unsigned long local_flags = READ_ONCE(ti->ipipe_flags); + int ret; + + /* + * NOTE: This is a backport from the DOVETAIL syscall + * redirector to the older pipeline implementation. + * + * == + * + * If the syscall # is out of bounds and the current IRQ stage + * is not the root one, this has to be a non-native system + * call handled by some co-kernel on the head stage. Hand it + * over to the head stage via the fast syscall handler. + * + * Otherwise, if the system call is out of bounds or the + * current thread is shared with a co-kernel, hand the syscall + * over to the latter through the pipeline stages. This + * allows: + * + * - the co-kernel to receive the initial - foreign - syscall + * a thread should send for enabling syscall handling by the + * co-kernel. + * + * - the co-kernel to manipulate the current execution stage + * for handling the request, which includes switching the + * current thread back to the root stage if the syscall is a + * native one, or promoting it to the head stage if handling + * the foreign syscall requires this. + * + * Native syscalls from regular (non-pipeline) threads are + * ignored by this routine, and flow down to the regular + * system call handler. + */ + + if (nr >= NR_syscalls && (local_flags & _TIP_HEAD)) { + ipipe_fastcall_hook(regs); + local_flags = READ_ONCE(ti->ipipe_flags); + if (local_flags & _TIP_HEAD) { + if (local_flags & _TIP_MAYDAY) + __ipipe_call_mayday(regs); + return 1; /* don't pass down, no tail work. */ + } else { + sync_root_irqs(); + return -1; /* don't pass down, do tail work. */ + } + } + + if ((local_flags & _TIP_NOTIFY) || nr >= NR_syscalls) { + ret =__ipipe_notify_syscall(regs); + local_flags = READ_ONCE(ti->ipipe_flags); + if (local_flags & _TIP_HEAD) + return 1; /* don't pass down, no tail work. */ + if (ret) + return -1; /* don't pass down, do tail work. */ + } + + return 0; /* pass syscall down to the host. */ +} + +#ifdef CONFIG_PREEMPT + +void preempt_schedule_irq(void); + +void __sched __ipipe_preempt_schedule_irq(void) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + + if (WARN_ON_ONCE(!hard_irqs_disabled())) + hard_local_irq_disable(); + + local_irq_save(flags); + hard_local_irq_enable(); + preempt_schedule_irq(); /* Ok, may reschedule now. */ + hard_local_irq_disable(); + + /* + * Flush any pending interrupt that may have been logged after + * preempt_schedule_irq() stalled the root stage before + * returning to us, and now. + */ + p = ipipe_this_cpu_root_context(); + if (unlikely(__ipipe_ipending_p(p))) { + trace_hardirqs_on(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + __ipipe_sync_stage(); + } + + __ipipe_restore_root_nosync(flags); +} + +#else /* !CONFIG_PREEMPT */ + +#define __ipipe_preempt_schedule_irq() do { } while (0) + +#endif /* !CONFIG_PREEMPT */ + +#ifdef CONFIG_TRACE_IRQFLAGS +#define root_stall_after_handler() local_irq_disable() +#else +#define root_stall_after_handler() do { } while (0) +#endif + +/* + * __ipipe_do_sync_stage() -- Flush the pending IRQs for the current + * domain (and processor). This routine flushes the interrupt log (see + * "Optimistic interrupt protection" from D. Stodolsky et al. for more + * on the deferred interrupt scheme). Every interrupt that occurred + * while the pipeline was stalled gets played. + * + * WARNING: CPU migration may occur over this routine. + */ +void __ipipe_do_sync_stage(void) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_domain *ipd; + int irq; + + p = __ipipe_current_context; +respin: + ipd = p->domain; + + __set_bit(IPIPE_STALL_FLAG, &p->status); + smp_wmb(); + + if (ipd == ipipe_root_domain) + trace_hardirqs_off(); + + for (;;) { + irq = __ipipe_next_irq(p); + if (irq < 0) + break; + /* + * Make sure the compiler does not reorder wrongly, so + * that all updates to maps are done before the + * handler gets called. + */ + barrier(); + + if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + continue; + + if (ipd != ipipe_head_domain) + hard_local_irq_enable(); + + if (likely(ipd != ipipe_root_domain)) { + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); + __ipipe_run_irqtail(irq); + hard_local_irq_disable(); + } else if (ipipe_virtual_irq_p(irq)) { + irq_enter(); + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); + irq_exit(); + root_stall_after_handler(); + hard_local_irq_disable(); + } else { + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); + root_stall_after_handler(); + hard_local_irq_disable(); + } + + /* + * We may have migrated to a different CPU (1) upon + * return from the handler, or downgraded from the + * head domain to the root one (2), the opposite way + * is NOT allowed though. + * + * (1) reload the current per-cpu context pointer, so + * that we further pull pending interrupts from the + * proper per-cpu log. + * + * (2) check the stall bit to know whether we may + * dispatch any interrupt pending for the root domain, + * and respin the entire dispatch loop if + * so. Otherwise, immediately return to the caller, + * _without_ affecting the stall state for the root + * domain, since we do not own it at this stage. This + * case is basically reflecting what may happen in + * dispatch_irq_head() for the fast path. + */ + p = __ipipe_current_context; + if (p->domain != ipd) { + IPIPE_BUG_ON(ipd == ipipe_root_domain); + if (test_bit(IPIPE_STALL_FLAG, &p->status)) + return; + goto respin; + } + } + + if (ipd == ipipe_root_domain) + trace_hardirqs_on(); + + __clear_bit(IPIPE_STALL_FLAG, &p->status); +} + +void __ipipe_call_mayday(struct pt_regs *regs) +{ + unsigned long flags; + + ipipe_clear_thread_flag(TIP_MAYDAY); + flags = hard_local_irq_save(); + __ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs); + hard_local_irq_restore(flags); +} + +#ifdef CONFIG_SMP + +/* Always called with hw interrupts off. */ +void __ipipe_do_critical_sync(unsigned int irq, void *cookie) +{ + int cpu = ipipe_processor_id(); + + cpumask_set_cpu(cpu, &__ipipe_cpu_sync_map); + + /* + * Now we are in sync with the lock requestor running on + * another CPU. Enter a spinning wait until he releases the + * global lock. + */ + raw_spin_lock(&__ipipe_cpu_barrier); + + /* Got it. Now get out. */ + + /* Call the sync routine if any. */ + if (__ipipe_cpu_sync) + __ipipe_cpu_sync(); + + cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map); + + raw_spin_unlock(&__ipipe_cpu_barrier); + + cpumask_clear_cpu(cpu, &__ipipe_cpu_sync_map); +} +#endif /* CONFIG_SMP */ + +unsigned long ipipe_critical_enter(void (*syncfn)(void)) +{ + cpumask_t allbutself __maybe_unused, online __maybe_unused; + int cpu __maybe_unused, n __maybe_unused; + unsigned long flags, loops __maybe_unused; + + flags = hard_local_irq_save(); + + if (num_online_cpus() == 1) + return flags; + +#ifdef CONFIG_SMP + + cpu = ipipe_processor_id(); + if (!cpumask_test_and_set_cpu(cpu, &__ipipe_cpu_lock_map)) { + while (test_and_set_bit(0, &__ipipe_critical_lock)) { + n = 0; + hard_local_irq_enable(); + + do + cpu_relax(); + while (++n < cpu); + + hard_local_irq_disable(); + } +restart: + online = *cpu_online_mask; + raw_spin_lock(&__ipipe_cpu_barrier); + + __ipipe_cpu_sync = syncfn; + + cpumask_clear(&__ipipe_cpu_pass_map); + cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map); + + /* + * Send the sync IPI to all processors but the current + * one. + */ + cpumask_andnot(&allbutself, &online, &__ipipe_cpu_pass_map); + ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself); + loops = IPIPE_CRITICAL_TIMEOUT; + + while (!cpumask_equal(&__ipipe_cpu_sync_map, &allbutself)) { + if (--loops > 0) { + cpu_relax(); + continue; + } + /* + * We ran into a deadlock due to a contended + * rwlock. Cancel this round and retry. + */ + __ipipe_cpu_sync = NULL; + + raw_spin_unlock(&__ipipe_cpu_barrier); + /* + * Ensure all CPUs consumed the IPI to avoid + * running __ipipe_cpu_sync prematurely. This + * usually resolves the deadlock reason too. + */ + while (!cpumask_equal(&online, &__ipipe_cpu_pass_map)) + cpu_relax(); + + goto restart; + } + } + + atomic_inc(&__ipipe_critical_count); + +#endif /* CONFIG_SMP */ + + return flags; +} +EXPORT_SYMBOL_GPL(ipipe_critical_enter); + +void ipipe_critical_exit(unsigned long flags) +{ + if (num_online_cpus() == 1) { + hard_local_irq_restore(flags); + return; + } + +#ifdef CONFIG_SMP + if (atomic_dec_and_test(&__ipipe_critical_count)) { + raw_spin_unlock(&__ipipe_cpu_barrier); + while (!cpumask_empty(&__ipipe_cpu_sync_map)) + cpu_relax(); + cpumask_clear_cpu(ipipe_processor_id(), &__ipipe_cpu_lock_map); + clear_bit(0, &__ipipe_critical_lock); + smp_mb__after_atomic(); + } +#endif /* CONFIG_SMP */ + + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ipipe_critical_exit); + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + +void ipipe_root_only(void) +{ + struct ipipe_domain *this_domain; + unsigned long flags; + + flags = hard_smp_local_irq_save(); + + this_domain = __ipipe_current_domain; + if (likely(this_domain == ipipe_root_domain && + !test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) { + hard_smp_local_irq_restore(flags); + return; + } + + if (!__this_cpu_read(ipipe_percpu.context_check)) { + hard_smp_local_irq_restore(flags); + return; + } + + hard_smp_local_irq_restore(flags); + + ipipe_prepare_panic(); + ipipe_trace_panic_freeze(); + + if (this_domain != ipipe_root_domain) + pr_err("I-pipe: Detected illicit call from head domain '%s'\n" + " into a regular Linux service\n", + this_domain->name); + else + pr_err("I-pipe: Detected stalled head domain, " + "probably caused by a bug.\n" + " A critical section may have been " + "left unterminated.\n"); + dump_stack(); + ipipe_trace_panic_dump(); +} +EXPORT_SYMBOL(ipipe_root_only); + +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ + +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) + +int notrace __ipipe_check_percpu_access(void) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_domain *this_domain; + unsigned long flags; + int ret = 0; + + flags = hard_local_irq_save_notrace(); + + /* + * Don't use __ipipe_current_domain here, this would recurse + * indefinitely. + */ + this_domain = raw_cpu_read(ipipe_percpu.curr)->domain; + + /* + * Only the root domain may implement preemptive CPU migration + * of tasks, so anything above in the pipeline should be fine. + */ + if (this_domain != ipipe_root_domain) + goto out; + + if (raw_irqs_disabled_flags(flags)) + goto out; + + /* + * Last chance: hw interrupts were enabled on entry while + * running over the root domain, but the root stage might be + * currently stalled, in which case preemption would be + * disabled, and no migration could occur. + */ + + p = raw_cpu_ptr(&ipipe_percpu.root); + if (!preemptible()) + goto out; + /* + * Our caller may end up accessing the wrong per-cpu variable + * instance due to CPU migration; tell it to complain about + * this. + */ + ret = 1; +out: + hard_local_irq_restore_notrace(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(__ipipe_check_percpu_access); + +void __ipipe_spin_unlock_debug(unsigned long flags) +{ + /* + * We catch a nasty issue where spin_unlock_irqrestore() on a + * regular kernel spinlock is about to re-enable hw interrupts + * in a section entered with hw irqs off. This is clearly the + * sign of a massive breakage coming. Usual suspect is a + * regular spinlock which was overlooked, used within a + * section which must run with hw irqs disabled. + */ + IPIPE_WARN_ONCE(!raw_irqs_disabled_flags(flags) && hard_irqs_disabled()); +} +EXPORT_SYMBOL(__ipipe_spin_unlock_debug); + +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ + +void ipipe_prepare_panic(void) +{ +#ifdef CONFIG_PRINTK + __ipipe_printk_bypass = 1; +#endif + ipipe_context_check_off(); +} +EXPORT_SYMBOL_GPL(ipipe_prepare_panic); + +static void __ipipe_do_work(unsigned int virq, void *cookie) +{ + struct ipipe_work_header *work; + unsigned long flags; + void *curr, *tail; + int cpu; + + /* + * Work is dispatched in enqueuing order. This interrupt + * context can't migrate to another CPU. + */ + cpu = smp_processor_id(); + curr = per_cpu(work_buf, cpu); + + for (;;) { + flags = hard_local_irq_save(); + tail = per_cpu(work_tail, cpu); + if (curr == tail) { + per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu); + hard_local_irq_restore(flags); + return; + } + work = curr; + curr += work->size; + hard_local_irq_restore(flags); + work->handler(work); + } +} + +void __ipipe_post_work_root(struct ipipe_work_header *work) +{ + unsigned long flags; + void *tail; + int cpu; + + /* + * Subtle: we want to use the head stall/unstall operators, + * not the hard_* routines to protect against races. This way, + * we ensure that a root-based caller will trigger the virq + * handling immediately when unstalling the head stage, as a + * result of calling __ipipe_sync_pipeline() under the hood. + */ + flags = ipipe_test_and_stall_head(); + cpu = ipipe_processor_id(); + tail = per_cpu(work_tail, cpu); + + if (WARN_ON_ONCE((unsigned char *)tail + work->size >= + per_cpu(work_buf, cpu) + WORKBUF_SIZE)) + goto out; + + /* Work handling is deferred, so data has to be copied. */ + memcpy(tail, work, work->size); + per_cpu(work_tail, cpu) = tail + work->size; + ipipe_post_irq_root(__ipipe_work_virq); +out: + ipipe_restore_head(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_post_work_root); + +void __weak __ipipe_arch_share_current(int flags) +{ +} + +void __ipipe_share_current(int flags) +{ + ipipe_root_only(); + + __ipipe_arch_share_current(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_share_current); + +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \ + defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT) +void __ipipe_uaccess_might_fault(void) +{ + struct ipipe_percpu_domain_data *pdd; + struct ipipe_domain *ipd; + unsigned long flags; + + flags = hard_local_irq_save(); + ipd = __ipipe_current_domain; + if (ipd == ipipe_root_domain) { + hard_local_irq_restore(flags); + might_fault(); + return; + } + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + pdd = ipipe_this_cpu_context(ipd); + WARN_ON_ONCE(hard_irqs_disabled_flags(flags) + || test_bit(IPIPE_STALL_FLAG, &pdd->status)); +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + (void)pdd; +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_uaccess_might_fault); +#endif diff --git a/kernel/ipipe/timer.c b/kernel/ipipe/timer.c new file mode 100644 index 000000000000..0da956413890 --- /dev/null +++ b/kernel/ipipe/timer.c @@ -0,0 +1,588 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/timer.c + * + * Copyright (C) 2012 Gilles Chanteperdrix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * I-pipe timer request interface. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long __ipipe_hrtimer_freq; + +static LIST_HEAD(timers); +static IPIPE_DEFINE_SPINLOCK(lock); + +static DEFINE_PER_CPU(struct ipipe_timer *, percpu_timer); + +#ifdef CONFIG_GENERIC_CLOCKEVENTS +/* + * Default request method: switch to oneshot mode if supported. + */ +static void ipipe_timer_default_request(struct ipipe_timer *timer, int steal) +{ + struct clock_event_device *evtdev = timer->host_timer; + + if (!(evtdev->features & CLOCK_EVT_FEAT_ONESHOT)) + return; + + if (clockevent_state_oneshot(evtdev) || + clockevent_state_oneshot_stopped(evtdev)) + timer->orig_mode = CLOCK_EVT_MODE_ONESHOT; + else { + if (clockevent_state_periodic(evtdev)) + timer->orig_mode = CLOCK_EVT_MODE_PERIODIC; + else if (clockevent_state_shutdown(evtdev)) + timer->orig_mode = CLOCK_EVT_MODE_SHUTDOWN; + else + timer->orig_mode = CLOCK_EVT_MODE_UNUSED; + evtdev->set_state_oneshot(evtdev); + evtdev->set_next_event(timer->freq / HZ, evtdev); + } +} + +/* + * Default release method: return the timer to the mode it had when + * starting. + */ +static void ipipe_timer_default_release(struct ipipe_timer *timer) +{ + struct clock_event_device *evtdev = timer->host_timer; + + switch (timer->orig_mode) { + case CLOCK_EVT_MODE_SHUTDOWN: + evtdev->set_state_shutdown(evtdev); + break; + case CLOCK_EVT_MODE_PERIODIC: + evtdev->set_state_periodic(evtdev); + case CLOCK_EVT_MODE_ONESHOT: + evtdev->set_next_event(timer->freq / HZ, evtdev); + break; + } +} + +static int get_dev_mode(struct clock_event_device *evtdev) +{ + if (clockevent_state_oneshot(evtdev) || + clockevent_state_oneshot_stopped(evtdev)) + return CLOCK_EVT_MODE_ONESHOT; + + if (clockevent_state_periodic(evtdev)) + return CLOCK_EVT_MODE_PERIODIC; + + if (clockevent_state_shutdown(evtdev)) + return CLOCK_EVT_MODE_SHUTDOWN; + + return CLOCK_EVT_MODE_UNUSED; +} + +void ipipe_host_timer_register(struct clock_event_device *evtdev) +{ + struct ipipe_timer *timer = evtdev->ipipe_timer; + + if (timer == NULL) + return; + + timer->orig_mode = CLOCK_EVT_MODE_UNUSED; + + if (timer->request == NULL) + timer->request = ipipe_timer_default_request; + + /* + * By default, use the same method as linux timer, on ARM at + * least, most set_next_event methods are safe to be called + * from Xenomai domain anyway. + */ + if (timer->set == NULL) { + timer->timer_set = evtdev; + timer->set = (typeof(timer->set))evtdev->set_next_event; + } + + if (timer->release == NULL) + timer->release = ipipe_timer_default_release; + + if (timer->name == NULL) + timer->name = evtdev->name; + + if (timer->rating == 0) + timer->rating = evtdev->rating; + + timer->freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; + + if (timer->min_delay_ticks == 0) + timer->min_delay_ticks = + (evtdev->min_delta_ns * evtdev->mult) >> evtdev->shift; + + if (timer->cpumask == NULL) + timer->cpumask = evtdev->cpumask; + + timer->host_timer = evtdev; + + ipipe_timer_register(timer); +} +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + +/* + * register a timer: maintain them in a list sorted by rating + */ +void ipipe_timer_register(struct ipipe_timer *timer) +{ + struct ipipe_timer *t; + unsigned long flags; + + if (timer->timer_set == NULL) + timer->timer_set = timer; + + if (timer->cpumask == NULL) + timer->cpumask = cpumask_of(smp_processor_id()); + + raw_spin_lock_irqsave(&lock, flags); + + list_for_each_entry(t, &timers, link) { + if (t->rating <= timer->rating) { + __list_add(&timer->link, t->link.prev, &t->link); + goto done; + } + } + list_add_tail(&timer->link, &timers); + done: + raw_spin_unlock_irqrestore(&lock, flags); +} + +static void ipipe_timer_request_sync(void) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + struct clock_event_device *evtdev; + int steal; + + if (!timer) + return; + + evtdev = timer->host_timer; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + steal = evtdev != NULL && !clockevent_state_detached(evtdev); +#else /* !CONFIG_GENERIC_CLOCKEVENTS */ + steal = 1; +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ + + timer->request(timer, steal); +} + +static void config_pcpu_timer(struct ipipe_timer *t, unsigned hrclock_freq) +{ + unsigned long long tmp; + unsigned hrtimer_freq; + + if (__ipipe_hrtimer_freq != t->freq) + __ipipe_hrtimer_freq = t->freq; + + hrtimer_freq = t->freq; + if (__ipipe_hrclock_freq > UINT_MAX) + hrtimer_freq /= 1000; + + t->c2t_integ = hrtimer_freq / hrclock_freq; + tmp = (((unsigned long long) + (hrtimer_freq % hrclock_freq)) << 32) + + hrclock_freq - 1; + do_div(tmp, hrclock_freq); + t->c2t_frac = tmp; +} + +/* Set up a timer as per-cpu timer for ipipe */ +static void install_pcpu_timer(unsigned cpu, unsigned hrclock_freq, + struct ipipe_timer *t) +{ + per_cpu(ipipe_percpu.hrtimer_irq, cpu) = t->irq; + per_cpu(percpu_timer, cpu) = t; + config_pcpu_timer(t, hrclock_freq); +} + +static void select_root_only_timer(unsigned cpu, unsigned hrclock_khz, + const struct cpumask *mask, + struct ipipe_timer *t) { + unsigned icpu; + struct clock_event_device *evtdev; + + /* + * If no ipipe-supported CPU shares an interrupt with the + * timer, we do not need to care about it. + */ + for_each_cpu(icpu, mask) { + if (t->irq == per_cpu(ipipe_percpu.hrtimer_irq, icpu)) { +#ifdef CONFIG_GENERIC_CLOCKEVENTS + evtdev = t->host_timer; + if (evtdev && clockevent_state_shutdown(evtdev)) + continue; +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + goto found; + } + } + + return; + +found: + install_pcpu_timer(cpu, hrclock_khz, t); +} + +/* + * Choose per-cpu timers with the highest rating by traversing the + * rating-sorted list for each CPU. + */ +int ipipe_select_timers(const struct cpumask *mask) +{ + unsigned hrclock_freq; + unsigned long long tmp; + struct ipipe_timer *t; + struct clock_event_device *evtdev; + unsigned long flags; + unsigned cpu; + cpumask_t fixup; + + if (!__ipipe_hrclock_ok()) { + printk("I-pipe: high-resolution clock not working\n"); + return -ENODEV; + } + + if (__ipipe_hrclock_freq > UINT_MAX) { + tmp = __ipipe_hrclock_freq; + do_div(tmp, 1000); + hrclock_freq = tmp; + } else + hrclock_freq = __ipipe_hrclock_freq; + + raw_spin_lock_irqsave(&lock, flags); + + /* First, choose timers for the CPUs handled by ipipe */ + for_each_cpu(cpu, mask) { + list_for_each_entry(t, &timers, link) { + if (!cpumask_test_cpu(cpu, t->cpumask)) + continue; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + evtdev = t->host_timer; + if (evtdev && clockevent_state_shutdown(evtdev)) + continue; +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + goto found; + } + + printk("I-pipe: could not find timer for cpu #%d\n", + cpu); + goto err_remove_all; +found: + install_pcpu_timer(cpu, hrclock_freq, t); + } + + /* + * Second, check if we need to fix up any CPUs not supported + * by ipipe (but by Linux) whose interrupt may need to be + * forwarded because they have the same IRQ as an ipipe-enabled + * timer. + */ + cpumask_andnot(&fixup, cpu_online_mask, mask); + + for_each_cpu(cpu, &fixup) { + list_for_each_entry(t, &timers, link) { + if (!cpumask_test_cpu(cpu, t->cpumask)) + continue; + + select_root_only_timer(cpu, hrclock_freq, mask, t); + } + } + + raw_spin_unlock_irqrestore(&lock, flags); + + flags = ipipe_critical_enter(ipipe_timer_request_sync); + ipipe_timer_request_sync(); + ipipe_critical_exit(flags); + + return 0; + +err_remove_all: + raw_spin_unlock_irqrestore(&lock, flags); + + for_each_cpu(cpu, mask) { + per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1; + per_cpu(percpu_timer, cpu) = NULL; + } + __ipipe_hrtimer_freq = 0; + + return -ENODEV; +} + +static void ipipe_timer_release_sync(void) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + if (timer) + timer->release(timer); +} + +void ipipe_timers_release(void) +{ + unsigned long flags; + unsigned cpu; + + flags = ipipe_critical_enter(ipipe_timer_release_sync); + ipipe_timer_release_sync(); + ipipe_critical_exit(flags); + + for_each_online_cpu(cpu) { + per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1; + per_cpu(percpu_timer, cpu) = NULL; + __ipipe_hrtimer_freq = 0; + } +} + +static void __ipipe_ack_hrtimer_irq(struct irq_desc *desc) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + if (desc) + desc->ipipe_ack(desc); + if (timer->ack) + timer->ack(); + if (desc) + desc->ipipe_end(desc); +} + +static int do_set_oneshot(struct clock_event_device *cdev) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + timer->mode_handler(CLOCK_EVT_MODE_ONESHOT, cdev); + + return 0; +} + +static int do_set_periodic(struct clock_event_device *cdev) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + timer->mode_handler(CLOCK_EVT_MODE_PERIODIC, cdev); + + return 0; +} + +static int do_set_shutdown(struct clock_event_device *cdev) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev); + + return 0; +} + +int ipipe_timer_start(void (*tick_handler)(void), + void (*emumode)(enum clock_event_mode mode, + struct clock_event_device *cdev), + int (*emutick)(unsigned long evt, + struct clock_event_device *cdev), + unsigned cpu) +{ + struct clock_event_device *evtdev; + struct ipipe_timer *timer; + struct irq_desc *desc; + unsigned long flags; + int steal, ret; + + timer = per_cpu(percpu_timer, cpu); + evtdev = timer->host_timer; + + flags = ipipe_critical_enter(NULL); + + ret = ipipe_request_irq(ipipe_head_domain, timer->irq, + (ipipe_irq_handler_t)tick_handler, + NULL, __ipipe_ack_hrtimer_irq); + if (ret < 0 && ret != -EBUSY) { + ipipe_critical_exit(flags); + return ret; + } + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + steal = evtdev != NULL && !clockevent_state_detached(evtdev); + if (steal && evtdev->ipipe_stolen == 0) { + timer->real_mult = evtdev->mult; + timer->real_shift = evtdev->shift; + timer->orig_set_state_periodic = evtdev->set_state_periodic; + timer->orig_set_state_oneshot = evtdev->set_state_oneshot; + timer->orig_set_state_oneshot_stopped = evtdev->set_state_oneshot_stopped; + timer->orig_set_state_shutdown = evtdev->set_state_shutdown; + timer->orig_set_next_event = evtdev->set_next_event; + timer->mode_handler = emumode; + evtdev->mult = 1; + evtdev->shift = 0; + evtdev->max_delta_ns = UINT_MAX; + evtdev->set_state_periodic = do_set_periodic; + evtdev->set_state_oneshot = do_set_oneshot; + evtdev->set_state_oneshot_stopped = do_set_oneshot; + evtdev->set_state_shutdown = do_set_shutdown; + evtdev->set_next_event = emutick; + evtdev->ipipe_stolen = 1; + } + + ret = get_dev_mode(evtdev); +#else /* CONFIG_GENERIC_CLOCKEVENTS */ + steal = 1; + ret = 0; +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + + ipipe_critical_exit(flags); + + desc = irq_to_desc(timer->irq); + if (desc && irqd_irq_disabled(&desc->irq_data)) + ipipe_enable_irq(timer->irq); + + return ret; +} + +void ipipe_timer_stop(unsigned cpu) +{ + unsigned long __maybe_unused flags; + struct clock_event_device *evtdev; + struct ipipe_timer *timer; + struct irq_desc *desc; + + timer = per_cpu(percpu_timer, cpu); + evtdev = timer->host_timer; + + desc = irq_to_desc(timer->irq); + if (desc && irqd_irq_disabled(&desc->irq_data)) + ipipe_disable_irq(timer->irq); + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + if (evtdev) { + flags = ipipe_critical_enter(NULL); + + if (evtdev->ipipe_stolen) { + evtdev->mult = timer->real_mult; + evtdev->shift = timer->real_shift; + evtdev->set_state_periodic = timer->orig_set_state_periodic; + evtdev->set_state_oneshot = timer->orig_set_state_oneshot; + evtdev->set_state_oneshot_stopped = timer->orig_set_state_oneshot_stopped; + evtdev->set_state_shutdown = timer->orig_set_state_shutdown; + evtdev->set_next_event = timer->orig_set_next_event; + evtdev->ipipe_stolen = 0; + } + + ipipe_critical_exit(flags); + } +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + + ipipe_free_irq(ipipe_head_domain, timer->irq); +} + +void ipipe_timer_set(unsigned long cdelay) +{ + unsigned long tdelay; + struct ipipe_timer *t; + + t = __ipipe_raw_cpu_read(percpu_timer); + + /* + * Even though some architectures may use a 64 bits delay + * here, we voluntarily limit to 32 bits, 4 billions ticks + * should be enough for now. Would a timer needs more, an + * extra call to the tick handler would simply occur after 4 + * billions ticks. + */ + if (cdelay > UINT_MAX) + cdelay = UINT_MAX; + + tdelay = cdelay; + if (t->c2t_integ != 1) + tdelay *= t->c2t_integ; + if (t->c2t_frac) + tdelay += ((unsigned long long)cdelay * t->c2t_frac) >> 32; + if (tdelay < t->min_delay_ticks) + tdelay = t->min_delay_ticks; + + if (t->set(tdelay, t->timer_set) < 0) + ipipe_raise_irq(t->irq); +} +EXPORT_SYMBOL_GPL(ipipe_timer_set); + +const char *ipipe_timer_name(void) +{ + return per_cpu(percpu_timer, 0)->name; +} +EXPORT_SYMBOL_GPL(ipipe_timer_name); + +unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns) +{ + unsigned long long tmp; + BUG_ON(!timer->freq); + tmp = (unsigned long long)ns * timer->freq; + do_div(tmp, 1000000000); + return tmp; +} + +#ifdef CONFIG_IPIPE_HAVE_HOSTRT +/* + * NOTE: The architecture specific code must only call this function + * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled. + * The event receiver is responsible for providing proper locking. + */ +void ipipe_update_hostrt(struct timekeeper *tk) +{ + struct tk_read_base *tkr = &tk->tkr_mono; + struct clocksource *clock = tkr->clock; + struct ipipe_hostrt_data data; + struct timespec xt; + + xt.tv_sec = tk->xtime_sec; + xt.tv_nsec = (long)(tkr->xtime_nsec >> tkr->shift); + ipipe_root_only(); + data.live = 1; + data.cycle_last = tkr->cycle_last; + data.mask = clock->mask; + data.mult = tkr->mult; + data.shift = tkr->shift; + data.wall_time_sec = xt.tv_sec; + data.wall_time_nsec = xt.tv_nsec; + data.wall_to_monotonic.tv_sec = tk->wall_to_monotonic.tv_sec; + data.wall_to_monotonic.tv_nsec = tk->wall_to_monotonic.tv_nsec; + __ipipe_notify_kevent(IPIPE_KEVT_HOSTRT, &data); +} + +#endif /* CONFIG_IPIPE_HAVE_HOSTRT */ + +int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, + bool force); + +void __ipipe_timer_refresh_freq(unsigned int hrclock_freq) +{ + struct ipipe_timer *t = __ipipe_raw_cpu_read(percpu_timer); + unsigned long flags; + + if (t && t->refresh_freq) { + t->freq = t->refresh_freq(); + flags = hard_local_irq_save(); + config_pcpu_timer(t, hrclock_freq); + hard_local_irq_restore(flags); + clockevents_program_event(t->host_timer, + t->host_timer->next_event, false); + } +} diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c new file mode 100644 index 000000000000..7f4d03ae77b8 --- /dev/null +++ b/kernel/ipipe/tracer.c @@ -0,0 +1,1486 @@ +/* -*- linux-c -*- + * kernel/ipipe/tracer.c + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2008 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ +#define IPIPE_DEFAULT_ACTIVE 0 +#define IPIPE_DEFAULT_MAX 1 +#define IPIPE_DEFAULT_FROZEN 2 + +#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) +#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) + +#define IPIPE_DEFAULT_PRE_TRACE 10 +#define IPIPE_DEFAULT_POST_TRACE 10 +#define IPIPE_DEFAULT_BACK_TRACE 100 + +#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ +#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ + +#define IPIPE_TFLG_NMI_LOCK 0x0001 +#define IPIPE_TFLG_NMI_HIT 0x0002 +#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 + +#define IPIPE_TFLG_HWIRQ_OFF 0x0100 +#define IPIPE_TFLG_FREEZING 0x0200 +#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ +#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 +#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ +#define IPIPE_TFLG_DOMSTATE_BITS 1 + +#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ + (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) +#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ + ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) + +struct ipipe_trace_point { + short type; + short flags; + unsigned long eip; + unsigned long parent_eip; + unsigned long v; + unsigned long long timestamp; +}; + +struct ipipe_trace_path { + volatile int flags; + int dump_lock; /* separated from flags due to cross-cpu access */ + int trace_pos; /* next point to fill */ + int begin, end; /* finalised path begin and end */ + int post_trace; /* non-zero when in post-trace phase */ + unsigned long long length; /* max path length in cycles */ + unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ + unsigned long nmi_saved_parent_eip; + unsigned long nmi_saved_v; + struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; +} ____cacheline_aligned_in_smp; + +enum ipipe_trace_type +{ + IPIPE_TRACE_FUNC = 0, + IPIPE_TRACE_BEGIN, + IPIPE_TRACE_END, + IPIPE_TRACE_FREEZE, + IPIPE_TRACE_SPECIAL, + IPIPE_TRACE_PID, + IPIPE_TRACE_EVENT, +}; + +#define IPIPE_TYPE_MASK 0x0007 +#define IPIPE_TYPE_BITS 3 + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC +static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); +#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ +static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = + { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +int ipipe_trace_enable = 0; + +static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; +static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; +static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; +static IPIPE_DEFINE_SPINLOCK(global_path_lock); +static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; +static int post_trace = IPIPE_DEFAULT_POST_TRACE; +static int back_trace = IPIPE_DEFAULT_BACK_TRACE; +static int verbose_trace = 1; +static unsigned long trace_overhead; + +static unsigned long trigger_begin; +static unsigned long trigger_end; + +static DEFINE_MUTEX(out_mutex); +static struct ipipe_trace_path *print_path; +#ifdef CONFIG_IPIPE_TRACE_PANIC +static struct ipipe_trace_path *panic_path; +#endif /* CONFIG_IPIPE_TRACE_PANIC */ +static int print_pre_trace; +static int print_post_trace; + + +static long __ipipe_signed_tsc2us(long long tsc); +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); + +static inline void store_states(struct ipipe_domain *ipd, + struct ipipe_trace_point *point, int pos) +{ + if (test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status)) + point->flags |= 1 << (pos + IPIPE_TFLG_DOMSTATE_SHIFT); + + if (ipd == __ipipe_current_domain) + point->flags |= pos << IPIPE_TFLG_CURRDOM_SHIFT; +} + +static notrace void +__ipipe_store_domain_states(struct ipipe_trace_point *point) +{ + store_states(ipipe_root_domain, point, 0); + if (ipipe_head_domain != ipipe_root_domain) + store_states(ipipe_head_domain, point, 1); +} + +static notrace int __ipipe_get_free_trace_path(int old, int cpu) +{ + int new_active = old; + struct ipipe_trace_path *tp; + + do { + if (++new_active == IPIPE_TRACE_PATHS) + new_active = 0; + tp = &per_cpu(trace_path, cpu)[new_active]; + } while (new_active == per_cpu(max_path, cpu) || + new_active == per_cpu(frozen_path, cpu) || + tp->dump_lock); + + return new_active; +} + +static notrace void +__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, + struct ipipe_trace_path *old_tp, int old_pos) +{ + int i; + + new_tp->trace_pos = pre_trace+1; + + for (i = new_tp->trace_pos; i > 0; i--) + memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], + &old_tp->point[WRAP_POINT_NO(old_pos-i)], + sizeof(struct ipipe_trace_point)); + + /* mark the end (i.e. the point before point[0]) invalid */ + new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = per_cpu(active_path, cpu); + unsigned long long length; + + /* do we have a new worst case? */ + length = tp->point[tp->end].timestamp - + tp->point[tp->begin].timestamp; + if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { + /* we need protection here against other cpus trying + to start a proc dump */ + raw_spin_lock(&global_path_lock); + + /* active path holds new worst case */ + tp->length = length; + per_cpu(max_path, cpu) = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu); + + raw_spin_unlock(&global_path_lock); + + tp = &per_cpu(trace_path, cpu)[active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + } + + return tp; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = per_cpu(active_path, cpu); + int n; + + /* frozen paths have no core (begin=end) */ + tp->begin = tp->end; + + /* we need protection here against other cpus trying + * to set their frozen path or to start a proc dump */ + raw_spin_lock(&global_path_lock); + + per_cpu(frozen_path, cpu) = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu); + + /* check if this is the first frozen path */ + for_each_possible_cpu(n) { + if (n != cpu && + per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) + tp->end = -1; + } + + raw_spin_unlock(&global_path_lock); + + tp = &per_cpu(trace_path, cpu)[active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + + return tp; +} + +void notrace +__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + struct ipipe_trace_path *tp, *old_tp; + int pos, next_pos, begin; + struct ipipe_trace_point *point; + unsigned long flags; + int cpu; + + flags = hard_local_irq_save_notrace(); + + cpu = ipipe_processor_id(); + restart: + tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + /* here starts a race window with NMIs - catched below */ + + /* check for NMI recursion */ + if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { + tp->flags |= IPIPE_TFLG_NMI_HIT; + + /* first freeze request from NMI context? */ + if ((type == IPIPE_TRACE_FREEZE) && + !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { + /* save arguments and mark deferred freezing */ + tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; + tp->nmi_saved_eip = eip; + tp->nmi_saved_parent_eip = parent_eip; + tp->nmi_saved_v = v; + } + return; /* no need for restoring flags inside IRQ */ + } + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (unlikely(tp != + &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + /* get the point buffer */ + pos = tp->trace_pos; + point = &tp->point[pos]; + + /* store all trace point data */ + point->type = type; + point->flags = hard_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; + point->eip = eip; + point->parent_eip = parent_eip; + point->v = v; + ipipe_read_tsc(point->timestamp); + + __ipipe_store_domain_states(point); + + /* forward to next point buffer */ + next_pos = WRAP_POINT_NO(pos+1); + tp->trace_pos = next_pos; + + /* only mark beginning if we haven't started yet */ + begin = tp->begin; + if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) + tp->begin = pos; + + /* end of critical path, start post-trace if not already started */ + if (unlikely(type == IPIPE_TRACE_END) && + (begin >= 0) && !tp->post_trace) + tp->post_trace = post_trace + 1; + + /* freeze only if the slot is free and we are not already freezing */ + if ((unlikely(type == IPIPE_TRACE_FREEZE) || + (unlikely(eip >= trigger_begin && eip <= trigger_end) && + type == IPIPE_TRACE_FUNC)) && + per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && + !(tp->flags & IPIPE_TFLG_FREEZING)) { + tp->post_trace = post_trace + 1; + tp->flags |= IPIPE_TFLG_FREEZING; + } + + /* enforce end of trace in case of overflow */ + if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { + tp->end = pos; + goto enforce_end; + } + + /* stop tracing this path if we are in post-trace and + * a) that phase is over now or + * b) a new TRACE_BEGIN came in but we are not freezing this path */ + if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || + ((type == IPIPE_TRACE_BEGIN) && + !(tp->flags & IPIPE_TFLG_FREEZING))))) { + /* store the path's end (i.e. excluding post-trace) */ + tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); + + enforce_end: + if (tp->flags & IPIPE_TFLG_FREEZING) + tp = __ipipe_trace_freeze(cpu, tp, pos); + else + tp = __ipipe_trace_end(cpu, tp, pos); + + /* reset the active path, maybe already start a new one */ + tp->begin = (type == IPIPE_TRACE_BEGIN) ? + WRAP_POINT_NO(tp->trace_pos - 1) : -1; + tp->end = -1; + tp->post_trace = 0; + tp->flags = 0; + + /* update active_path not earlier to avoid races with NMIs */ + per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); + } + + /* we still have old_tp and point, + * let's reset NMI lock and check for catches */ + old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { + /* well, this late tagging may not immediately be visible for + * other cpus already dumping this path - a minor issue */ + point->flags |= IPIPE_TFLG_NMI_HIT; + + /* handle deferred freezing from NMI context */ + if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, + old_tp->nmi_saved_parent_eip, + old_tp->nmi_saved_v); + } + + hard_local_irq_restore_notrace(flags); +} + +static unsigned long __ipipe_global_path_lock(void) +{ + unsigned long flags; + int cpu; + struct ipipe_trace_path *tp; + + raw_spin_lock_irqsave(&global_path_lock, flags); + + cpu = ipipe_processor_id(); + restart: + tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + /* here is small race window with NMIs - catched below */ + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + return flags; +} + +static void __ipipe_global_path_unlock(unsigned long flags) +{ + int cpu; + struct ipipe_trace_path *tp; + + /* release spinlock first - it's not involved in the NMI issue */ + __ipipe_spin_unlock_irqbegin(&global_path_lock); + + cpu = ipipe_processor_id(); + tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* handle deferred freezing from NMI context */ + if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, + tp->nmi_saved_parent_eip, tp->nmi_saved_v); + + /* See __ipipe_spin_lock_irqsave() and friends. */ + __ipipe_spin_unlock_irqcomplete(flags); +} + +void notrace asmlinkage +ipipe_trace_asm(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(type, eip, parent_eip, v); +} + +void notrace ipipe_trace_begin(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_begin); + +void notrace ipipe_trace_end(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_end); + +void notrace ipipe_trace_irqbegin(int irq, struct pt_regs *regs) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, instruction_pointer(regs), + __BUILTIN_RETURN_ADDRESS1, irq); +} +EXPORT_SYMBOL_GPL(ipipe_trace_irqbegin); + +void notrace ipipe_trace_irqend(int irq, struct pt_regs *regs) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, instruction_pointer(regs), + __BUILTIN_RETURN_ADDRESS1, irq); +} +EXPORT_SYMBOL_GPL(ipipe_trace_irqend); + +void notrace ipipe_trace_freeze(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_freeze); + +void notrace ipipe_trace_special(unsigned char id, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_special); + +void notrace ipipe_trace_pid(pid_t pid, short prio) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, pid); +} +EXPORT_SYMBOL_GPL(ipipe_trace_pid); + +void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, delay_tsc); +} +EXPORT_SYMBOL_GPL(ipipe_trace_event); + +int ipipe_trace_max_reset(void) +{ + int cpu; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_possible_cpu(cpu) { + path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ipipe_trace_max_reset); + +int ipipe_trace_frozen_reset(void) +{ + int cpu; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_online_cpu(cpu) { + path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ipipe_trace_frozen_reset); + +static void +__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, + int trylock) +{ + struct task_struct *task = NULL; + char buf[8]; + int i; + int locked = 1; + + if (trylock) { + if (!read_trylock(&tasklist_lock)) + locked = 0; + } else + read_lock(&tasklist_lock); + + if (locked) + task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); + + if (task) + strncpy(task_info, task->comm, 11); + else + strcpy(task_info, "--"); + + if (locked) + read_unlock(&tasklist_lock); + + for (i = strlen(task_info); i < 11; i++) + task_info[i] = ' '; + + sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); + strcpy(task_info + (11 - strlen(buf)), buf); +} + +static void +__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, + struct ipipe_trace_point *point) +{ + long time; + int type; + + time = __ipipe_signed_tsc2us(point->timestamp - + path->point[path->begin].timestamp + point->v); + type = point->type >> IPIPE_TYPE_BITS; + + if (type == 0) + /* + * Event type #0 is predefined, stands for the next + * timer tick. + */ + sprintf(buf, "tick@%-6ld", time); + else + sprintf(buf, "%3d@%-7ld", type, time); +} + +#ifdef CONFIG_IPIPE_TRACE_PANIC + +void ipipe_trace_panic_freeze(void) +{ + unsigned long flags; + int cpu; + + if (!ipipe_trace_enable) + return; + + ipipe_trace_enable = 0; + flags = hard_local_irq_save_notrace(); + + cpu = ipipe_processor_id(); + + panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ipipe_trace_panic_freeze); + +void ipipe_trace_panic_dump(void) +{ + int cnt = back_trace; + int start, pos; + char buf[16]; + + if (!panic_path) + return; + + ipipe_context_check_off(); + + printk("I-pipe tracer log (%d points):\n", cnt); + + start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); + + while (cnt-- > 0) { + struct ipipe_trace_point *point = &panic_path->point[pos]; + long time; + char info[16]; + int i; + + printk(" %c", + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + printk("%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '*' : ' ')); + + if (!point->eip) + printk("--\n"); + else { + __ipipe_trace_point_type(buf, point); + printk("%s", buf); + + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + printk(" "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(info, + point, 1); + printk("%s", info); + break; + + case IPIPE_TRACE_EVENT: + __ipipe_get_event_date(info, + panic_path, point); + printk("%s", info); + break; + + default: + printk("0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + panic_path->point[start].timestamp); + printk(" %5ld ", time); + + __ipipe_print_symname(NULL, point->eip); + printk(" ("); + __ipipe_print_symname(NULL, point->parent_eip); + printk(")\n"); + } + pos = WRAP_POINT_NO(pos - 1); + } + + panic_path = NULL; +} +EXPORT_SYMBOL_GPL(ipipe_trace_panic_dump); + +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + + +/* --- /proc output --- */ + +static notrace int __ipipe_in_critical_trpath(long point_no) +{ + return ((WRAP_POINT_NO(point_no-print_path->begin) < + WRAP_POINT_NO(print_path->end-print_path->begin)) || + ((print_path->end == print_path->begin) && + (WRAP_POINT_NO(point_no-print_path->end) > + print_post_trace))); +} + +static long __ipipe_signed_tsc2us(long long tsc) +{ + unsigned long long abs_tsc; + long us; + + if (!__ipipe_hrclock_ok()) + return 0; + + /* ipipe_tsc2us works on unsigned => handle sign separately */ + abs_tsc = (tsc >= 0) ? tsc : -tsc; + us = ipipe_tsc2us(abs_tsc); + if (tsc < 0) + return -us; + else + return us; +} + +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) +{ + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + strcpy(buf, "func "); + break; + + case IPIPE_TRACE_BEGIN: + strcpy(buf, "begin "); + break; + + case IPIPE_TRACE_END: + strcpy(buf, "end "); + break; + + case IPIPE_TRACE_FREEZE: + strcpy(buf, "freeze "); + break; + + case IPIPE_TRACE_SPECIAL: + sprintf(buf, "(0x%02x) ", + point->type >> IPIPE_TYPE_BITS); + break; + + case IPIPE_TRACE_PID: + sprintf(buf, "[%5d] ", (pid_t)point->v); + break; + + case IPIPE_TRACE_EVENT: + sprintf(buf, "event "); + break; + } +} + +static void +__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) +{ + char mark = ' '; + int point_no = point - print_path->point; + int i; + + if (print_path->end == point_no) + mark = '<'; + else if (print_path->begin == point_no) + mark = '>'; + else if (__ipipe_in_critical_trpath(point_no)) + mark = ':'; + seq_printf(m, "%c%c", mark, + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + if (!verbose_trace) + return; + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + seq_printf(m, "%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); +} + +static void +__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) +{ + unsigned long delay = 0; + int next; + char *mark = " "; + + next = WRAP_POINT_NO(point+1 - print_path->point); + + if (next != print_path->trace_pos) + delay = ipipe_tsc2ns(print_path->point[next].timestamp - + point->timestamp); + + if (__ipipe_in_critical_trpath(point - print_path->point)) { + if (delay > IPIPE_DELAY_WARN) + mark = "! "; + else if (delay > IPIPE_DELAY_NOTE) + mark = "+ "; + } + seq_puts(m, mark); + + if (verbose_trace) + seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, + (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); + else + seq_puts(m, " "); +} + +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + +#ifdef CONFIG_IPIPE_TRACE_PANIC + if (!m) { + /* panic dump */ + if (sym_name) { + printk("%s+0x%lx", sym_name, offset); + if (modname) + printk(" [%s]", modname); + } else + printk("<%08lx>", eip); + } else +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + { + if (sym_name) { + if (verbose_trace) { + seq_printf(m, "%s+0x%lx", sym_name, offset); + if (modname) + seq_printf(m, " [%s]", modname); + } else + seq_puts(m, sym_name); + } else + seq_printf(m, "<%08lx>", eip); + } +} + +static void __ipipe_print_headline(struct seq_file *m) +{ + const char *name[2]; + + seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " + "us\n\n", trace_overhead/1000, trace_overhead%1000); + + if (verbose_trace) { + name[0] = ipipe_root_domain->name; + if (ipipe_head_domain != ipipe_root_domain) + name[1] = ipipe_head_domain->name; + else + name[1] = ""; + + seq_printf(m, + " +----- Hard IRQs ('|': locked)\n" + " |+-- %s\n" + " ||+- %s%s\n" + " ||| +---------- " + "Delay flag ('+': > %d us, '!': > %d us)\n" + " ||| | +- " + "NMI noise ('N')\n" + " ||| | |\n" + " Type User Val. Time Delay Function " + "(Parent)\n", + name[1], name[0], + " ('*': domain stalled, '+': current, " + "'#': current+stalled)", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); + } else + seq_printf(m, + " +--------------- Hard IRQs ('|': locked)\n" + " | +- Delay flag " + "('+': > %d us, '!': > %d us)\n" + " | |\n" + " Type Time Function (Parent)\n", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); +} + +static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *tp; + unsigned long length_usecs; + int points, cpu; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the longest of all per-cpu paths */ + print_path = NULL; + for_each_online_cpu(cpu) { + tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; + if ((print_path == NULL) || + (tp->length > print_path->length)) { + print_path = tp; + break; + } + } + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + if (!__ipipe_hrclock_ok()) { + seq_printf(m, "No hrclock available, dumping traces disabled\n"); + return NULL; + } + + /* does this path actually contain data? */ + if (print_path->end == print_path->begin) + return NULL; + + /* number of points inside the critical path */ + points = WRAP_POINT_NO(print_path->end-print_path->begin+1); + + /* pre- and post-tracing length, post-trace length was frozen + in __ipipe_trace, pre-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = pre_trace; + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - + print_post_trace; + + length_usecs = ipipe_tsc2us(print_path->length); + seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe release #%d\n" + "-------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_CORE_RELEASE); + seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " + "%d (-%d/+%d), Length: %lu us\n", + cpu, print_path->point[print_path->begin].timestamp, + points, print_pre_trace, print_post_trace, length_usecs); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + n)]; +} + +static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) +{ + loff_t n = ++*pos; + + /* check if we are inside the trace range with the next entry */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + *pos)]; +} + +static void __ipipe_prtrace_stop(struct seq_file *m, void *p) +{ + if (print_path) + print_path->dump_lock = 0; + mutex_unlock(&out_mutex); +} + +static int __ipipe_prtrace_show(struct seq_file *m, void *p) +{ + long time; + struct ipipe_trace_point *point = p; + char buf[16]; + + if (!point->eip) { + seq_puts(m, "--\n"); + return 0; + } + + __ipipe_print_pathmark(m, point); + __ipipe_trace_point_type(buf, point); + seq_puts(m, buf); + if (verbose_trace) + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + seq_puts(m, " "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(buf, point, 0); + seq_puts(m, buf); + break; + + case IPIPE_TRACE_EVENT: + __ipipe_get_event_date(buf, print_path, point); + seq_puts(m, buf); + break; + + default: + seq_printf(m, "0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + print_path->point[print_path->begin].timestamp); + seq_printf(m, "%5ld", time); + + __ipipe_print_delay(m, point); + __ipipe_print_symname(m, point->eip); + seq_puts(m, " ("); + __ipipe_print_symname(m, point->parent_eip); + seq_puts(m, ")\n"); + + return 0; +} + +static struct seq_operations __ipipe_max_ptrace_ops = { + .start = __ipipe_max_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_max_ptrace_ops); +} + +static ssize_t +__ipipe_max_reset(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + mutex_lock(&out_mutex); + ipipe_trace_max_reset(); + mutex_unlock(&out_mutex); + + return count; +} + +static const struct file_operations __ipipe_max_prtrace_fops = { + .open = __ipipe_max_prtrace_open, + .read = seq_read, + .write = __ipipe_max_reset, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *tp; + int cpu; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the first of all per-cpu frozen paths */ + print_path = NULL; + for_each_online_cpu(cpu) { + tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; + if (tp->end >= 0) { + print_path = tp; + break; + } + } + if (print_path) + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + if (!print_path) + return NULL; + + if (!__ipipe_hrclock_ok()) { + seq_printf(m, "No hrclock available, dumping traces disabled\n"); + return NULL; + } + + /* back- and post-tracing length, post-trace length was frozen + in __ipipe_trace, back-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = back_trace-1; /* substract freeze point */ + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 2 - + print_post_trace; + + seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe release #%d\n" + "------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_CORE_RELEASE); + seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", + cpu, print_path->point[print_path->begin].timestamp, + print_pre_trace+1, print_post_trace); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= print_pre_trace + 1 + print_post_trace) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin- + print_pre_trace+n)]; +} + +static struct seq_operations __ipipe_frozen_ptrace_ops = { + .start = __ipipe_frozen_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_frozen_ptrace_ops); +} + +static ssize_t +__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, pbuffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + ipipe_trace_frozen_reset(); + if (val > 0) + ipipe_trace_freeze(-1); + mutex_unlock(&out_mutex); + + return count; +} + +static const struct file_operations __ipipe_frozen_prtrace_fops = { + .open = __ipipe_frozen_prtrace_open, + .read = seq_read, + .write = __ipipe_frozen_ctrl, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __ipipe_rd_proc_val(struct seq_file *p, void *data) +{ + seq_printf(p, "%u\n", *(int *)p->private); + return 0; +} + +static ssize_t +__ipipe_wr_proc_val(struct file *file, const char __user *buffer, + size_t count, loff_t *data) +{ + struct seq_file *p = file->private_data; + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + *(int *)p->private = val; + mutex_unlock(&out_mutex); + + return count; +} + +static int __ipipe_rw_proc_val_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_rd_proc_val, PDE_DATA(inode)); +} + +static const struct file_operations __ipipe_rw_proc_val_ops = { + .open = __ipipe_rw_proc_val_open, + .read = seq_read, + .write = __ipipe_wr_proc_val, + .llseek = seq_lseek, + .release = single_release, +}; + +static void __init +__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, + const char *name, int *value_ptr) +{ + proc_create_data(name, 0644, trace_dir, &__ipipe_rw_proc_val_ops, + value_ptr); +} + +static int __ipipe_rd_trigger(struct seq_file *p, void *data) +{ + char str[KSYM_SYMBOL_LEN]; + + if (trigger_begin) { + sprint_symbol(str, trigger_begin); + seq_printf(p, "%s\n", str); + } + return 0; +} + +static ssize_t +__ipipe_wr_trigger(struct file *file, const char __user *buffer, + size_t count, loff_t *data) +{ + char buf[KSYM_SYMBOL_LEN]; + unsigned long begin, end; + + if (count > sizeof(buf) - 1) + count = sizeof(buf) - 1; + if (copy_from_user(buf, buffer, count)) + return -EFAULT; + buf[count] = 0; + if (buf[count-1] == '\n') + buf[count-1] = 0; + + begin = kallsyms_lookup_name(buf); + if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) + return -ENOENT; + end += begin - 1; + + mutex_lock(&out_mutex); + /* invalidate the current range before setting a new one */ + trigger_end = 0; + wmb(); + ipipe_trace_frozen_reset(); + + /* set new range */ + trigger_begin = begin; + wmb(); + trigger_end = end; + mutex_unlock(&out_mutex); + + return count; +} + +static int __ipipe_rw_trigger_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_rd_trigger, NULL); +} + +static const struct file_operations __ipipe_rw_trigger_ops = { + .open = __ipipe_rw_trigger_open, + .read = seq_read, + .write = __ipipe_wr_trigger, + .llseek = seq_lseek, + .release = single_release, +}; + + +#ifdef CONFIG_IPIPE_TRACE_MCOUNT +static void notrace +ipipe_trace_function(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); +} + +static struct ftrace_ops ipipe_trace_ops = { + .func = ipipe_trace_function, + .flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE, +}; + +static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + + if (ipipe_trace_enable) { + if (!val) + unregister_ftrace_function(&ipipe_trace_ops); + } else if (val) + register_ftrace_function(&ipipe_trace_ops); + + ipipe_trace_enable = val; + + mutex_unlock(&out_mutex); + + return count; +} + +static const struct file_operations __ipipe_rw_enable_ops = { + .open = __ipipe_rw_proc_val_open, + .read = seq_read, + .write = __ipipe_wr_enable, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ + +extern struct proc_dir_entry *ipipe_proc_root; + +void __init __ipipe_tracer_hrclock_initialized(void) +{ + unsigned long long start, end, min = ULLONG_MAX; + int i; + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + if (!per_cpu(trace_path, 0)) + return; +#endif + /* Calculate minimum overhead of __ipipe_trace() */ + hard_local_irq_disable(); + for (i = 0; i < 100; i++) { + ipipe_read_tsc(start); + __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, 0); + ipipe_read_tsc(end); + + end -= start; + if (end < min) + min = end; + } + hard_local_irq_enable(); + trace_overhead = ipipe_tsc2ns(min); +} + +void __init __ipipe_init_tracer(void) +{ + struct proc_dir_entry *trace_dir; +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + int cpu, path; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + for_each_possible_cpu(cpu) { + struct ipipe_trace_path *tp_buf; + + tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * + IPIPE_TRACE_PATHS, cpu_to_node(cpu)); + if (!tp_buf) { + pr_err("I-pipe: " + "insufficient memory for trace buffer.\n"); + return; + } + memset(tp_buf, 0, + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + for (path = 0; path < IPIPE_TRACE_PATHS; path++) { + tp_buf[path].begin = -1; + tp_buf[path].end = -1; + } + per_cpu(trace_path, cpu) = tp_buf; + } +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + + if (__ipipe_hrclock_ok() && !trace_overhead) + __ipipe_tracer_hrclock_initialized(); + +#ifdef CONFIG_IPIPE_TRACE_ENABLE + ipipe_trace_enable = 1; +#ifdef CONFIG_IPIPE_TRACE_MCOUNT + ftrace_enabled = 1; + register_ftrace_function(&ipipe_trace_ops); +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ +#endif /* CONFIG_IPIPE_TRACE_ENABLE */ + + trace_dir = proc_mkdir("trace", ipipe_proc_root); + + proc_create("max", 0644, trace_dir, &__ipipe_max_prtrace_fops); + proc_create("frozen", 0644, trace_dir, &__ipipe_frozen_prtrace_fops); + + proc_create("trigger", 0644, trace_dir, &__ipipe_rw_trigger_ops); + + __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", + &pre_trace); + __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", + &post_trace); + __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", + &back_trace); + __ipipe_create_trace_proc_val(trace_dir, "verbose", + &verbose_trace); +#ifdef CONFIG_IPIPE_TRACE_MCOUNT + proc_create_data("enable", 0644, trace_dir, &__ipipe_rw_enable_ops, + &ipipe_trace_enable); +#else /* !CONFIG_IPIPE_TRACE_MCOUNT */ + __ipipe_create_trace_proc_val(trace_dir, "enable", + &ipipe_trace_enable); +#endif /* !CONFIG_IPIPE_TRACE_MCOUNT */ +} diff --git a/kernel/panic.c b/kernel/panic.c index bdd18afa19a4..8555ebf1b7c4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -471,6 +473,8 @@ void oops_enter(void) { tracing_off(); /* can't trust the integrity of the kernel anymore: */ + ipipe_trace_panic_freeze(); + ipipe_disable_context_check(); debug_locks_off(); do_oops_enter_exit(); } diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 0ec7d1d33a14..2034d02e8ee3 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -5,7 +5,7 @@ menu "RCU Debugging" config PROVE_RCU - def_bool PROVE_LOCKING + def_bool PROVE_LOCKING && !IPIPE config TORTURE_TEST tristate diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 55062461b2fd..2119b43b92cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1780,7 +1780,9 @@ void scheduler_ipi(void) * however a fair share of IPIs are still resched only so this would * somewhat pessimize the simple resched case. */ +#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI irq_enter(); +#endif sched_ttwu_pending(); /* @@ -1790,7 +1792,9 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI irq_exit(); +#endif } static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 257f4f0b4532..3e23da3bd6aa 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -84,6 +84,51 @@ void __weak arch_cpu_idle(void) local_irq_enable(); } +#ifdef CONFIG_IPIPE + +bool __weak ipipe_enter_idle_hook(void) +{ + /* + * By default, we may enter the idle state if no co-kernel is + * present. + */ + return ipipe_root_domain == ipipe_head_domain; +} + +void __weak ipipe_exit_idle_hook(void) { } + +static bool pipeline_idle_enter(void) +{ + struct ipipe_percpu_domain_data *p; + + /* + * We may go idle if no interrupt is waiting delivery from the + * root stage, or a co-kernel denies such transition. + */ + hard_local_irq_disable(); + p = ipipe_this_cpu_root_context(); + + return !__ipipe_ipending_p(p) && ipipe_enter_idle_hook(); +} + +static inline void pipeline_idle_exit(void) +{ + ipipe_exit_idle_hook(); + /* unstall and re-enable hw IRQs too. */ + local_irq_enable(); +} + +#else + +static inline bool pipeline_idle_enter(void) +{ + return true; +} + +static inline void pipeline_idle_exit(void) { } + +#endif /* !CONFIG_IPIPE */ + /** * default_idle_call - Default CPU idle routine. * @@ -91,11 +136,12 @@ void __weak arch_cpu_idle(void) */ void __cpuidle default_idle_call(void) { - if (current_clr_polling_and_test()) { + if (current_clr_polling_and_test() || !pipeline_idle_enter()) { local_irq_enable(); } else { stop_critical_timings(); arch_cpu_idle(); + pipeline_idle_exit(); start_critical_timings(); } } @@ -103,11 +149,13 @@ void __cpuidle default_idle_call(void) static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, int next_state) { + int ret; + /* * The idle task must be scheduled, it is pointless to go to idle, just * update no idle residency and return. */ - if (current_clr_polling_and_test()) { + if (current_clr_polling_and_test() || !pipeline_idle_enter()) { dev->last_residency = 0; local_irq_enable(); return -EBUSY; @@ -118,7 +166,10 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, * This function will block until an interrupt occurs and will take * care of re-enabling the local interrupts */ - return cpuidle_enter(drv, dev, next_state); + ret = cpuidle_enter(drv, dev, next_state); + pipeline_idle_exit(); + + return ret; } /** @@ -157,6 +208,10 @@ static void cpuidle_idle_call(void) goto exit_idle; } + if (!pipeline_idle_enter()) { + local_irq_enable(); + goto exit_idle; + } /* * Suspend-to-idle ("s2idle") is a system state in which all user space * has been frozen, all I/O devices have been suspended and the only @@ -178,12 +233,14 @@ static void cpuidle_idle_call(void) next_state = cpuidle_find_deepest_state(drv, dev); call_cpuidle(drv, dev, next_state); + pipeline_idle_exit(); } else { /* * Ask the cpuidle framework to choose a convenient idle state. */ next_state = cpuidle_select(drv, dev); entered_state = call_cpuidle(drv, dev, next_state); + pipeline_idle_exit(); /* * Give the governor an opportunity to reflect on the outcome */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ff21b4dbb392..42c9175e6581 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -412,6 +412,7 @@ config MAGIC_SYSRQ keys are documented in . Don't say Y unless you really know what this hack does. + config MAGIC_SYSRQ_DEFAULT_ENABLE hex "Enable magic SysRq key functions by default" depends on MAGIC_SYSRQ @@ -431,6 +432,8 @@ config MAGIC_SYSRQ_SERIAL This option allows you to decide whether you want to enable the magic SysRq key. +source "kernel/ipipe/Kconfig.debug" + config DEBUG_KERNEL bool "Kernel debugging" help