Merge branches 'core-fixes-for-linus', 'x86-fixes-for-linus', 'timers-fixes-for-linus...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jan 2011 20:45:00 +0000 (12:45 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jan 2011 20:45:00 +0000 (12:45 -0800)
* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  rcu: avoid pointless blocked-task warnings
  rcu: demote SRCU_SYNCHRONIZE_DELAY from kernel-parameter status
  rtmutex: Fix comment about why new_owner can be NULL in wake_futex_pi()

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, olpc: Add missing Kconfig dependencies
  x86, mrst: Set correct APB timer IRQ affinity for secondary cpu
  x86: tsc: Fix calibration refinement conditionals to avoid divide by zero
  x86, ia64, acpi: Clean up x86-ism in drivers/acpi/numa.c

* 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  timekeeping: Make local variables static
  time: Rename misnamed minsec argument of clocks_calc_mult_shift()

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  tracing: Remove syscall_exit_fields
  tracing: Only process module tracepoints once
  perf record: Add "nodelay" mode, disabled by default
  perf sched: Fix list of events, dropping unsupported ':r' modifier
  Revert "perf tools: Emit clearer message for sys_perf_event_open ENOENT return"
  perf top: Fix annotate segv
  perf evsel: Fix order of event list deletion

18 files changed:
arch/x86/Kconfig
arch/x86/kernel/apb_timer.c
arch/x86/kernel/tsc.c
drivers/acpi/numa.c
include/trace/events/module.h
init/Kconfig
kernel/futex.c
kernel/rcutiny.c
kernel/srcu.c
kernel/time/clocksource.c
kernel/time/timekeeping.c
kernel/trace/trace_syscalls.c
tools/perf/Documentation/perf-record.txt
tools/perf/builtin-record.c
tools/perf/builtin-sched.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/perf.c

index 47ae4a7..3ed5ad9 100644 (file)
@@ -2068,6 +2068,7 @@ config OLPC
        bool "One Laptop Per Child support"
        select GPIOLIB
        select OLPC_OPENFIRMWARE
+       depends on !X86_64 && !X86_PAE
        ---help---
          Add support for detecting the unique features of the OLPC
          XO hardware.
index 7c9ab59..51ef31a 100644 (file)
@@ -313,14 +313,16 @@ static void apbt_setup_irq(struct apbt_dev *adev)
        if (adev->irq == 0)
                return;
 
+       irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+       irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
+       /* APB timer irqs are set up as mp_irqs, timer is edge type */
+       __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
+
        if (system_state == SYSTEM_BOOTING) {
-               irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
-               irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
-               /* APB timer irqs are set up as mp_irqs, timer is edge type */
-               __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
                if (request_irq(adev->irq, apbt_interrupt_handler,
-                               IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
-                               adev->name, adev)) {
+                                       IRQF_TIMER | IRQF_DISABLED |
+                                       IRQF_NOBALANCING,
+                                       adev->name, adev)) {
                        printk(KERN_ERR "Failed request IRQ for APBT%d\n",
                               adev->num);
                }
index 823f79a..ffe5755 100644 (file)
@@ -464,7 +464,7 @@ unsigned long native_calibrate_tsc(void)
                tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
 
                /* hpet or pmtimer available ? */
-               if (!hpet && !ref1 && !ref2)
+               if (ref1 == ref2)
                        continue;
 
                /* Check, whether the sampling was disturbed by an SMI */
@@ -935,7 +935,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
        tsc_stop = tsc_read_refs(&ref_stop, hpet);
 
        /* hpet or pmtimer available ? */
-       if (!hpet && !ref_start && !ref_stop)
+       if (ref_start == ref_stop)
                goto out;
 
        /* Check, whether the sampling was disturbed by an SMI */
index d9926af..5eb25eb 100644 (file)
@@ -275,23 +275,19 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 int __init acpi_numa_init(void)
 {
        int ret = 0;
-       int nr_cpu_entries = nr_cpu_ids;
 
-#ifdef CONFIG_X86
        /*
         * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
         * SRAT cpu entries could have different order with that in MADT.
         * So go over all cpu entries in SRAT to get apicid to node mapping.
         */
-       nr_cpu_entries = MAX_LOCAL_APIC;
-#endif
 
        /* SRAT: Static Resource Affinity Table */
        if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
                acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-                                    acpi_parse_x2apic_affinity, nr_cpu_entries);
+                                    acpi_parse_x2apic_affinity, 0);
                acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-                                    acpi_parse_processor_affinity, nr_cpu_entries);
+                                    acpi_parse_processor_affinity, 0);
                ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
                                            acpi_parse_memory_affinity,
                                            NR_NODE_MEMBLKS);
index c7bb2f0..c6bae36 100644 (file)
@@ -1,5 +1,15 @@
+/*
+ * Because linux/module.h has tracepoints in the header, and ftrace.h
+ * eventually includes this file, define_trace.h includes linux/module.h
+ * But we do not want the module.h to override the TRACE_SYSTEM macro
+ * variable that define_trace.h is processing, so we only set it
+ * when module events are being processed, which would happen when
+ * CREATE_TRACE_POINTS is defined.
+ */
+#ifdef CREATE_TRACE_POINTS
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM module
+#endif
 
 #if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_MODULE_H
index 4f6cdbf..4e33790 100644 (file)
@@ -515,21 +515,6 @@ config RCU_BOOST_DELAY
 
          Accept the default if unsure.
 
-config SRCU_SYNCHRONIZE_DELAY
-       int "Microseconds to delay before waiting for readers"
-       range 0 20
-       default 10
-       help
-         This option controls how long SRCU delays before entering its
-         loop waiting on SRCU readers.  The purpose of this loop is
-         to avoid the unconditional context-switch penalty that would
-         otherwise be incurred if there was an active SRCU reader,
-         in a manner similar to adaptive locking schemes.  This should
-         be set to be a bit longer than the common-case SRCU read-side
-         critical-section overhead.
-
-         Accept the default if unsure.
-
 endmenu # "RCU Subsystem"
 
 config IKCONFIG
index 5207563..b766d28 100644 (file)
@@ -826,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
        /*
-        * This happens when we have stolen the lock and the original
-        * pending owner did not enqueue itself back on the rt_mutex.
-        * Thats not a tragedy. We know that way, that a lock waiter
-        * is on the fly. We make the futex_q waiter the pending owner.
+        * It is possible that the next waiter (the one that brought
+        * this owner to the kernel) timed out and is no longer
+        * waiting on the lock.
         */
        if (!new_owner)
                new_owner = this->task;
index 0344937..0c343b9 100644 (file)
@@ -189,7 +189,8 @@ static int rcu_kthread(void *arg)
        unsigned long flags;
 
        for (;;) {
-               wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
+               wait_event_interruptible(rcu_kthread_wq,
+                                        have_rcu_kthread_work != 0);
                morework = rcu_boost();
                local_irq_save(flags);
                work = have_rcu_kthread_work;
index 98d8c1e..73ce23f 100644 (file)
@@ -156,6 +156,16 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
 /*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited().  We spin for a fixed time period
+ * (defined below) to allow SRCU readers to exit their read-side critical
+ * sections.  If there are still some readers after 10 microseconds,
+ * we repeatedly block for 1-millisecond time periods.  This approach
+ * has done well in testing, so there is no need for a config parameter.
+ */
+#define SYNCHRONIZE_SRCU_READER_DELAY 10
+
+/*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
 static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
@@ -207,11 +217,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
         * will have finished executing.  We initially give readers
         * an arbitrarily chosen 10 microseconds to get out of their
         * SRCU read-side critical sections, then loop waiting 1/HZ
-        * seconds per iteration.
+        * seconds per iteration.  The 10-microsecond value has done
+        * very well in testing.
         */
 
        if (srcu_readers_active_idx(sp, idx))
-               udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY);
+               udelay(SYNCHRONIZE_SRCU_READER_DELAY);
        while (srcu_readers_active_idx(sp, idx))
                schedule_timeout_interruptible(1);
 
index c50a034..6519cf6 100644 (file)
@@ -113,7 +113,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);
  * @shift:     pointer to shift variable
  * @from:      frequency to convert from
  * @to:                frequency to convert to
- * @minsec:    guaranteed runtime conversion range in seconds
+ * @maxsec:    guaranteed runtime conversion range in seconds
  *
  * The function evaluates the shift/mult pair for the scaled math
  * operations of clocksources and clockevents.
@@ -122,7 +122,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);
  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
  * event @to is the counter frequency and @from is NSEC_PER_SEC.
  *
- * The @minsec conversion range argument controls the time frame in
+ * The @maxsec conversion range argument controls the time frame in
  * seconds which must be covered by the runtime conversion with the
  * calculated mult and shift factors. This guarantees that no 64bit
  * overflow happens when the input value of the conversion is
@@ -131,7 +131,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);
  * factors.
  */
 void
-clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
+clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
 {
        u64 tmp;
        u32 sft, sftacc= 32;
@@ -140,7 +140,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
         * Calculate the shift factor which is limiting the conversion
         * range:
         */
-       tmp = ((u64)minsec * from) >> 32;
+       tmp = ((u64)maxsec * from) >> 32;
        while (tmp) {
                tmp >>=1;
                sftacc--;
index 5536aaf..d27c756 100644 (file)
@@ -49,7 +49,7 @@ struct timekeeper {
        u32     mult;
 };
 
-struct timekeeper timekeeper;
+static struct timekeeper timekeeper;
 
 /**
  * timekeeper_setup_internals - Set up internals to use clocksource clock.
@@ -164,7 +164,7 @@ static struct timespec total_sleep_time;
 /*
  * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
  */
-struct timespec raw_time;
+static struct timespec raw_time;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
index bac752f..b706529 100644 (file)
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event,
 static int syscall_enter_define_fields(struct ftrace_event_call *call);
 static int syscall_exit_define_fields(struct ftrace_event_call *call);
 
-/* All syscall exit events have the same fields */
-static LIST_HEAD(syscall_exit_fields);
-
 static struct list_head *
 syscall_get_enter_fields(struct ftrace_event_call *call)
 {
@@ -34,34 +31,28 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
        return &entry->enter_fields;
 }
 
-static struct list_head *
-syscall_get_exit_fields(struct ftrace_event_call *call)
-{
-       return &syscall_exit_fields;
-}
-
 struct trace_event_functions enter_syscall_print_funcs = {
-       .trace                  = print_syscall_enter,
+       .trace          = print_syscall_enter,
 };
 
 struct trace_event_functions exit_syscall_print_funcs = {
-       .trace                  = print_syscall_exit,
+       .trace          = print_syscall_exit,
 };
 
 struct ftrace_event_class event_class_syscall_enter = {
-       .system                 = "syscalls",
-       .reg                    = syscall_enter_register,
-       .define_fields          = syscall_enter_define_fields,
-       .get_fields             = syscall_get_enter_fields,
-       .raw_init               = init_syscall_trace,
+       .system         = "syscalls",
+       .reg            = syscall_enter_register,
+       .define_fields  = syscall_enter_define_fields,
+       .get_fields     = syscall_get_enter_fields,
+       .raw_init       = init_syscall_trace,
 };
 
 struct ftrace_event_class event_class_syscall_exit = {
-       .system                 = "syscalls",
-       .reg                    = syscall_exit_register,
-       .define_fields          = syscall_exit_define_fields,
-       .get_fields             = syscall_get_exit_fields,
-       .raw_init               = init_syscall_trace,
+       .system         = "syscalls",
+       .reg            = syscall_exit_register,
+       .define_fields  = syscall_exit_define_fields,
+       .fields         = LIST_HEAD_INIT(event_class_syscall_exit.fields),
+       .raw_init       = init_syscall_trace,
 };
 
 extern unsigned long __start_syscalls_metadata[];
index 52462ae..e032716 100644 (file)
@@ -61,6 +61,9 @@ OPTIONS
 -r::
 --realtime=::
        Collect data with this RT SCHED_FIFO priority.
+-D::
+--no-delay::
+       Collect data without buffering.
 -A::
 --append::
        Append to the output file to do incremental profiling.
index 7069bd3..df6064a 100644 (file)
@@ -49,6 +49,7 @@ static int                    pipe_output                     =      0;
 static const char              *output_name                    = "perf.data";
 static int                     group                           =      0;
 static int                     realtime_prio                   =      0;
+static bool                    nodelay                         =  false;
 static bool                    raw_samples                     =  false;
 static bool                    sample_id_all_avail             =   true;
 static bool                    system_wide                     =  false;
@@ -307,6 +308,11 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
                attr->sample_type       |= PERF_SAMPLE_CPU;
        }
 
+       if (nodelay) {
+               attr->watermark = 0;
+               attr->wakeup_events = 1;
+       }
+
        attr->mmap              = track;
        attr->comm              = track;
        attr->inherit           = !no_inherit;
@@ -331,9 +337,6 @@ try_again:
                        else if (err ==  ENODEV && cpu_list) {
                                die("No such device - did you specify"
                                        " an out-of-range profile CPU?\n");
-                       } else if (err == ENOENT) {
-                               die("%s event is not supported. ",
-                                    event_name(evsel));
                        } else if (err == EINVAL && sample_id_all_avail) {
                                /*
                                 * Old kernel, no attr->sample_id_type_all field
@@ -480,6 +483,7 @@ static void atexit_header(void)
                        process_buildids();
                perf_header__write(&session->header, output, true);
                perf_session__delete(session);
+               perf_evsel_list__delete();
                symbol__exit();
        }
 }
@@ -845,6 +849,8 @@ const struct option record_options[] = {
                    "record events on existing thread id"),
        OPT_INTEGER('r', "realtime", &realtime_prio,
                    "collect data with this RT SCHED_FIFO priority"),
+       OPT_BOOLEAN('D', "no-delay", &nodelay,
+                   "collect data without buffering"),
        OPT_BOOLEAN('R', "raw-samples", &raw_samples,
                    "collect raw sample records from all opened counters"),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
index abd4b84..29e7ffd 100644 (file)
@@ -1843,15 +1843,15 @@ static const char *record_args[] = {
        "-f",
        "-m", "1024",
        "-c", "1",
-       "-e", "sched:sched_switch:r",
-       "-e", "sched:sched_stat_wait:r",
-       "-e", "sched:sched_stat_sleep:r",
-       "-e", "sched:sched_stat_iowait:r",
-       "-e", "sched:sched_stat_runtime:r",
-       "-e", "sched:sched_process_exit:r",
-       "-e", "sched:sched_process_fork:r",
-       "-e", "sched:sched_wakeup:r",
-       "-e", "sched:sched_migrate_task:r",
+       "-e", "sched:sched_switch",
+       "-e", "sched:sched_stat_wait",
+       "-e", "sched:sched_stat_sleep",
+       "-e", "sched:sched_stat_iowait",
+       "-e", "sched:sched_stat_runtime",
+       "-e", "sched:sched_process_exit",
+       "-e", "sched:sched_process_fork",
+       "-e", "sched:sched_wakeup",
+       "-e", "sched:sched_migrate_task",
 };
 
 static int __cmd_record(int argc, const char **argv)
index c385a63..0ff11d9 100644 (file)
@@ -743,6 +743,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 out_free_fd:
        list_for_each_entry(pos, &evsel_list, node)
                perf_evsel__free_stat_priv(pos);
+       perf_evsel_list__delete();
 out:
        thread_map__delete(threads);
        threads = NULL;
index 6ce4042..05344c6 100644 (file)
@@ -1247,8 +1247,6 @@ try_again:
                                die("Permission error - are you root?\n"
                                        "\t Consider tweaking"
                                        " /proc/sys/kernel/perf_event_paranoid.\n");
-                       if (err == ENOENT)
-                               die("%s event is not supported. ", event_name(evsel));
                        /*
                         * If it's cycles then fall back to hrtimer
                         * based cpu-clock-tick sw counter, which
@@ -1473,6 +1471,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                pos->attr.sample_period = default_interval;
        }
 
+       sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+
        symbol_conf.priv_size = (sizeof(struct sym_entry) +
                                 (nr_counters + 1) * sizeof(unsigned long));
 
@@ -1490,6 +1490,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 out_free_fd:
        list_for_each_entry(pos, &evsel_list, node)
                perf_evsel__free_mmap(pos);
+       perf_evsel_list__delete();
 
        return status;
 }
index 5b1ecd6..595d0f4 100644 (file)
@@ -286,8 +286,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
        status = p->fn(argc, argv, prefix);
        exit_browser(status);
 
-       perf_evsel_list__delete();
-
        if (status)
                return status & 0xff;