AVR32 AVR32 architecture is enabled.
AX25 Appropriate AX.25 support is enabled.
BLACKFIN Blackfin architecture is enabled.
+ CLK Common clock infrastructure is enabled.
DRM Direct Rendering Management support is enabled.
DYNAMIC_DEBUG Build in debug messages and enable them at runtime
EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
on: enable for both 32- and 64-bit processes
off: disable for both 32- and 64-bit processes
+ alloc_snapshot [FTRACE]
+ Allocate the ftrace snapshot buffer on boot up when the
+ main buffer is allocated. This is handy if debugging
+ and you need to use tracing_snapshot() on boot up, and
+ do not want to use tracing_snapshot_alloc() as it needs
+ to be done where GFP_KERNEL allocations are allowed.
+
amd_iommu= [HW,X86-64]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
cio_ignore= [S390]
See Documentation/s390/CommonIO for details.
+ clk_ignore_unused
+ [CLK]
+ Keep all clocks already enabled by bootloader on,
+ even if no driver has claimed them. This is useful
+ for debug and development, but should not be
+ needed on a platform with proper driver support.
+ For more information, see Documentation/clk.txt.
clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
[Deprecated]
is selected automatically. Check
Documentation/kdump/kdump.txt for further details.
- crashkernel_low=size[KMG]
- [KNL, x86] parts under 4G.
-
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
a memory unit (amount[KMG]). See also
Documentation/kdump/kdump.txt for an example.
+ crashkernel=size[KMG],high
+ [KNL, x86_64] range could be above 4G. Allow kernel
+ to allocate physical memory region from top, so could
+ be above 4G if system have more than 4G ram installed.
+ Otherwise memory region will be allocated below 4G, if
+ available.
+ It will be ignored if crashkernel=X is specified.
+ crashkernel=size[KMG],low
+ [KNL, x86_64] range under 4G. When crashkernel=X,high
+ is passed, kernel could allocate physical memory region
+ above 4G, that cause second kernel crash on system
+ that require some amount of low memory, e.g. swiotlb
+ requires at least 64M+32K low memory. Kernel would
+ try to allocate 72M below 4G automatically.
+ This one let user to specify own low range under 4G
+ for second kernel instead.
+ 0: to disable low allocation.
+ It will be ignored when crashkernel=X,high is not used
+ or memory reserved is below 4G.
+
cs89x0_dma= [HW,NET]
Format: <dma>
edd= [EDD]
Format: {"off" | "on" | "skip[mbr]"}
+ efi_no_storage_paranoia [EFI; X86]
+ Using this parameter you can use more than 50% of
+ your efi variable storage. Use this parameter only if
+ you are really sure that your UEFI does sane gc and
+ fulfills the spec otherwise your board may brick.
+
eisa_irq_edge= [PARISC,HW]
See header of drivers/parisc/eisa.c.
In kernels built with CONFIG_RCU_NOCB_CPU=y, set
the specified list of CPUs to be no-callback CPUs.
Invocation of these CPUs' RCU callbacks will
- be offloaded to "rcuoN" kthreads created for
- that purpose. This reduces OS jitter on the
+ be offloaded to "rcuox/N" kthreads created for
+ that purpose, where "x" is "b" for RCU-bh, "p"
+ for RCU-preempt, and "s" for RCU-sched, and "N"
+ is the CPU number. This reduces OS jitter on the
offloaded CPUs, which can be useful for HPC and
+
real-time workloads. It can also improve energy
efficiency for asymmetric multiprocessors.
leaf rcu_node structure. Useful for very large
systems.
+ rcutree.jiffies_till_first_fqs= [KNL,BOOT]
+ Set delay from grace-period initialization to
+ first attempt to force quiescent states.
+ Units are jiffies, minimum value is zero,
+ and maximum value is HZ.
+
+ rcutree.jiffies_till_next_fqs= [KNL,BOOT]
+ Set delay between subsequent attempts to force
+ quiescent states. Units are jiffies, minimum
+ value is one, and maximum value is HZ.
+
rcutree.qhimark= [KNL,BOOT]
Set threshold of queued
RCU callbacks over which batch limiting is disabled.
rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
Set timeout for RCU CPU stall warning messages.
- rcutree.jiffies_till_first_fqs= [KNL,BOOT]
- Set delay from grace-period initialization to
- first attempt to force quiescent states.
- Units are jiffies, minimum value is zero,
- and maximum value is HZ.
+ rcutree.rcu_idle_gp_delay= [KNL,BOOT]
+ Set wakeup interval for idle CPUs that have
+ RCU callbacks (RCU_FAST_NO_HZ=y).
- rcutree.jiffies_till_next_fqs= [KNL,BOOT]
- Set delay between subsequent attempts to force
- quiescent states. Units are jiffies, minimum
- value is one, and maximum value is HZ.
+ rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT]
+ Set wakeup interval for idle CPUs that have
+ only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
+ Lazy RCU callbacks are those which RCU can
+ prove do nothing more than free memory.
rcutorture.fqs_duration= [KNL,BOOT]
Set duration of force_quiescent_state bursts.
or other driver-specific files in the
Documentation/watchdog/ directory.
+ workqueue.disable_numa
+ By default, all work items queued to unbound
+ workqueues are affine to the NUMA nodes they're
+ issued on, which results in better behavior in
+ general. If NUMA affinity needs to be disabled for
+ whatever reason, this option can be used. Note
+ that this also can be controlled per-workqueue for
+ workqueues visible under /sys/bus/workqueue/.
+
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
- #define RCU_STATE_INITIALIZER(sname, cr) { \
+ #define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
.level = { &sname##_state.node[0] }, \
.call = cr, \
.fqs_state = RCU_GP_IDLE, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
.name = #sname, \
+ .abbr = sabbr, \
}
struct rcu_state rcu_sched_state =
- RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
+ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
- struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
+ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static struct rcu_state *rcu_state;
module_param(jiffies_till_first_fqs, ulong, 0644);
module_param(jiffies_till_next_fqs, ulong, 0644);
+ static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+ struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(int cpu);
if (rcu_gp_in_progress(rsp))
return 0; /* No, a grace period is already in progress. */
+ if (rcu_nocb_needs_gp(rsp))
+ return 1; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL])
return 0; /* No, this is a no-CBs (or offline) CPU. */
if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
{
int i;
+ if (init_nocb_callback_list(rdp))
+ return;
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
- init_nocb_callback_list(rdp);
}
/*
}
/*
+ * Trace-event helper function for rcu_start_future_gp() and
+ * rcu_nocb_wait_gp().
+ */
+ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+ unsigned long c, char *s)
+ {
+ trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
+ rnp->completed, c, rnp->level,
+ rnp->grplo, rnp->grphi, s);
+ }
+
+ /*
+ * Start some future grace period, as needed to handle newly arrived
+ * callbacks. The required future grace periods are recorded in each
+ * rcu_node structure's ->need_future_gp field.
+ *
+ * The caller must hold the specified rcu_node structure's ->lock.
+ */
+ static unsigned long __maybe_unused
+ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
+ {
+ unsigned long c;
+ int i;
+ struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+
+ /*
+ * Pick up grace-period number for new callbacks. If this
+ * grace period is already marked as needed, return to the caller.
+ */
+ c = rcu_cbs_completed(rdp->rsp, rnp);
+ trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
+ if (rnp->need_future_gp[c & 0x1]) {
+ trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
+ return c;
+ }
+
+ /*
+ * If either this rcu_node structure or the root rcu_node structure
+ * believe that a grace period is in progress, then we must wait
+ * for the one following, which is in "c". Because our request
+ * will be noticed at the end of the current grace period, we don't
+ * need to explicitly start one.
+ */
+ if (rnp->gpnum != rnp->completed ||
+ ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
+ rnp->need_future_gp[c & 0x1]++;
+ trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
+ return c;
+ }
+
+ /*
+ * There might be no grace period in progress. If we don't already
+ * hold it, acquire the root rcu_node structure's lock in order to
+ * start one (if needed).
+ */
+ if (rnp != rnp_root)
+ raw_spin_lock(&rnp_root->lock);
+
+ /*
+ * Get a new grace-period number. If there really is no grace
+ * period in progress, it will be smaller than the one we obtained
+ * earlier. Adjust callbacks as needed. Note that even no-CBs
+ * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
+ */
+ c = rcu_cbs_completed(rdp->rsp, rnp_root);
+ for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
+ if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
+ rdp->nxtcompleted[i] = c;
+
+ /*
+ * If the needed for the required grace period is already
+ * recorded, trace and leave.
+ */
+ if (rnp_root->need_future_gp[c & 0x1]) {
+ trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
+ goto unlock_out;
+ }
+
+ /* Record the need for the future grace period. */
+ rnp_root->need_future_gp[c & 0x1]++;
+
+ /* If a grace period is not already in progress, start one. */
+ if (rnp_root->gpnum != rnp_root->completed) {
+ trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
+ } else {
+ trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
+ rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+ }
+ unlock_out:
+ if (rnp != rnp_root)
+ raw_spin_unlock(&rnp_root->lock);
+ return c;
+ }
+
+ /*
+ * Clean up any old requests for the just-ended grace period. Also return
+ * whether any additional grace periods have been requested. Also invoke
+ * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
+ * waiting for this grace period to complete.
+ */
+ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+ {
+ int c = rnp->completed;
+ int needmore;
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+
+ rcu_nocb_gp_cleanup(rsp, rnp);
+ rnp->need_future_gp[c & 0x1] = 0;
+ needmore = rnp->need_future_gp[(c + 1) & 0x1];
+ trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
+ return needmore;
+ }
+
+ /*
* If there is room, assign a ->completed number to any callbacks on
* this CPU that have not already been assigned. Also accelerate any
* callbacks that were previously assigned a ->completed number that has
rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxtcompleted[i] = c;
}
+ /* Record any needed additional grace periods. */
+ rcu_start_future_gp(rnp, rdp);
/* Trace depending on how much we were able to accelerate. */
if (!*rdp->nxttail[RCU_WAIT_TAIL])
rdp = this_cpu_ptr(rsp->rda);
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
- rnp->gpnum = rsp->gpnum;
+ ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
WARN_ON_ONCE(rnp->completed != rsp->completed);
- rnp->completed = rsp->completed;
+ ACCESS_ONCE(rnp->completed) = rsp->completed;
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq(&rnp->lock);
#ifdef CONFIG_PROVE_RCU_DELAY
- if ((prandom_u32() % (rcu_num_nodes * 8)) == 0)
- if ((random32() % (rcu_num_nodes * 8)) == 0 &&
++ if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
+ system_state == SYSTEM_RUNNING)
schedule_timeout_uninterruptible(2);
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
cond_resched();
static void rcu_gp_cleanup(struct rcu_state *rsp)
{
unsigned long gp_duration;
+ int nocb = 0;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
raw_spin_lock_irq(&rnp->lock);
- rnp->completed = rsp->gpnum;
+ ACCESS_ONCE(rnp->completed) = rsp->gpnum;
+ rdp = this_cpu_ptr(rsp->rda);
+ if (rnp == rdp->mynode)
+ __rcu_process_gp_end(rsp, rnp, rdp);
+ nocb += rcu_future_gp_cleanup(rsp, rnp);
raw_spin_unlock_irq(&rnp->lock);
cond_resched();
}
rnp = rcu_get_root(rsp);
raw_spin_lock_irq(&rnp->lock);
+ rcu_nocb_gp_set(rnp, nocb);
rsp->completed = rsp->gpnum; /* Declare grace period done. */
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
+ rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
if (cpu_needs_another_gp(rsp, rdp))
rsp->gp_flags = 1;
raw_spin_unlock_irq(&rnp->lock);
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
- * the root node's ->lock, which is released before return. Hard irqs must
- * be disabled.
+ * the root node's ->lock and hard irqs must be disabled.
*
* Note that it is legal for a dying CPU (which is marked as offline) to
* invoke this function. This can happen when the dying CPU reports its
* quiescent state.
*/
static void
- rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
- __releases(rcu_get_root(rsp)->lock)
+ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+ struct rcu_data *rdp)
{
- struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
- struct rcu_node *rnp = rcu_get_root(rsp);
-
- if (!rsp->gp_kthread ||
- !cpu_needs_another_gp(rsp, rdp)) {
+ if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
/*
* Either we have not yet spawned the grace-period
* task, this CPU does not need another grace period,
* or a grace period is already in progress.
* Either way, don't start a new grace period.
*/
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
-
- /*
- * Because there is no grace period in progress right now,
- * any callbacks we have up to this point will be satisfied
- * by the next grace period. So this is a good place to
- * assign a grace period number to recently posted callbacks.
- */
- rcu_accelerate_cbs(rsp, rnp, rdp);
-
rsp->gp_flags = RCU_GP_FLAG_INIT;
- raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
-
- /* Ensure that CPU is aware of completion of last grace period. */
- rcu_process_gp_end(rsp, rdp);
- local_irq_restore(flags);
/* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(&rsp->gp_wq);
}
/*
+ * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
+ * callbacks. Note that rcu_start_gp_advanced() cannot do this because it
+ * is invoked indirectly from rcu_advance_cbs(), which would result in
+ * endless recursion -- or would do so if it wasn't for the self-deadlock
+ * that is encountered beforehand.
+ */
+ static void
+ rcu_start_gp(struct rcu_state *rsp)
+ {
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+ struct rcu_node *rnp = rcu_get_root(rsp);
+
+ /*
+ * If there is no grace period in progress right now, any
+ * callbacks we have up to this point will be satisfied by the
+ * next grace period. Also, advancing the callbacks reduces the
+ * probability of false positives from cpu_needs_another_gp()
+ * resulting in pointless grace periods. So, advance callbacks
+ * then start the grace period!
+ */
+ rcu_advance_cbs(rsp, rnp, rdp);
+ rcu_start_gp_advanced(rsp, rnp, rdp);
+ }
+
+ /*
* Report a full set of quiescent states to the specified rcu_state
* data structure. This involves cleaning up after the prior grace
* period and letting rcu_start_gp() start up the next grace period
- * if one is needed. Note that the caller must hold rnp->lock, as
- * required by rcu_start_gp(), which will release it.
+ * if one is needed. Note that the caller must hold rnp->lock, which
+ * is released before return.
*/
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
- rcu_start_gp(rsp, flags); /* releases above lock */
+ rcu_start_gp(rsp);
+ raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
} else {
local_irq_restore(flags);
}
static void invoke_rcu_core(void)
{
- raise_softirq(RCU_SOFTIRQ);
+ if (cpu_online(smp_processor_id()))
+ raise_softirq(RCU_SOFTIRQ);
}
/*
/* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) {
- unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
- raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
- rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
+ raw_spin_lock(&rnp_root->lock);
+ rcu_start_gp(rsp);
+ raw_spin_unlock(&rnp_root->lock);
} else {
/* Give the grace period a kick. */
rdp->blimit = LONG_MAX;
}
/*
- * Check to see if any future RCU-related work will need to be done
- * by the current CPU, even if none need be done immediately, returning
- * 1 if so.
+ * Return true if the specified CPU has any callback. If all_lazy is
+ * non-NULL, store an indication of whether all callbacks are lazy.
+ * (If there are no callbacks, all of them are deemed to be lazy.)
*/
- static int rcu_cpu_has_callbacks(int cpu)
+ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
{
+ bool al = true;
+ bool hc = false;
+ struct rcu_data *rdp;
struct rcu_state *rsp;
- /* RCU callbacks either ready or pending? */
- for_each_rcu_flavor(rsp)
- if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
- return 1;
- return 0;
+ for_each_rcu_flavor(rsp) {
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ if (rdp->qlen != rdp->qlen_lazy)
+ al = false;
+ if (rdp->nxtlist)
+ hc = true;
+ }
+ if (all_lazy)
+ *all_lazy = al;
+ return hc;
}
/*
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
atomic_set(&rdp->dynticks->dynticks,
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
- rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/* Add CPU to rcu_node bitmasks. */
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp;
- int ret = NOTIFY_OK;
trace_rcu_utilization("Start CPU hotplug");
switch (action) {
rcu_boost_kthread_setaffinity(rnp, -1);
break;
case CPU_DOWN_PREPARE:
- if (nocb_cpu_expendable(cpu))
- rcu_boost_kthread_setaffinity(rnp, cpu);
- else
- ret = NOTIFY_BAD;
+ rcu_boost_kthread_setaffinity(rnp, cpu);
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
- /*
- * The whole machine is "stopped" except this CPU, so we can
- * touch any data without introducing corruption. We send the
- * dying CPU's callbacks to an arbitrarily chosen online CPU.
- */
for_each_rcu_flavor(rsp)
rcu_cleanup_dying_cpu(rsp);
- rcu_cleanup_after_idle(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
break;
}
trace_rcu_utilization("End CPU hotplug");
- return ret;
+ return NOTIFY_OK;
}
/*
}
rnp->level = i;
INIT_LIST_HEAD(&rnp->blkd_tasks);
+ rcu_init_one_nocb(rnp);
}
}
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt();
- rcu_init_nocb();
- open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+ open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/*
* We don't need protection against CPU-hotplug here because