it can be updated at runtime by writing to the
corresponding sysfs file.
+ workqueue.cpu_intensive_thresh_us=
+ Per-cpu work items which run for longer than this
+ threshold are automatically considered CPU intensive
+ and excluded from concurrency management to prevent
+ them from noticeably delaying other per-cpu work
+ items. Default is 10000 (10ms).
+
workqueue.disable_numa
By default, all work items queued to unbound
workqueues are affine to the NUMA nodes they're
Use tools/workqueue/wq_monitor.py to monitor workqueue operations: ::
$ tools/workqueue/wq_monitor.py events
- total infl CMwake mayday rescued
- events 18545 0 5 - -
- events_highpri 8 0 0 - -
- events_long 3 0 0 - -
- events_unbound 38306 0 - - -
- events_freezable 0 0 0 - -
- events_power_efficient 29598 0 0 - -
- events_freezable_power_ 10 0 0 - -
- sock_diag_events 0 0 0 - -
-
- total infl CMwake mayday rescued
- events 18548 0 5 - -
- events_highpri 8 0 0 - -
- events_long 3 0 0 - -
- events_unbound 38322 0 - - -
- events_freezable 0 0 0 - -
- events_power_efficient 29603 0 0 - -
- events_freezable_power_ 10 0 0 - -
- sock_diag_events 0 0 0 - -
+ total infl CPUitsv CMwake mayday rescued
+ events 18545 0 0 5 - -
+ events_highpri 8 0 0 0 - -
+ events_long 3 0 0 0 - -
+ events_unbound 38306 0 - - - -
+ events_freezable 0 0 0 0 - -
+ events_power_efficient 29598 0 0 0 - -
+ events_freezable_power_ 10 0 0 0 - -
+ sock_diag_events 0 0 0 0 - -
+
+ total infl CPUitsv CMwake mayday rescued
+ events 18548 0 0 5 - -
+ events_highpri 8 0 0 0 - -
+ events_long 3 0 0 0 - -
+ events_unbound 38322 0 - - - -
+ events_freezable 0 0 0 0 - -
+ events_power_efficient 29603 0 0 0 - -
+ events_freezable_power_ 10 0 0 0 - -
+ sock_diag_events 0 0 0 0 - -
...
perf_event_task_tick();
+ if (curr->flags & PF_WQ_WORKER)
+ wq_worker_tick(curr);
+
#ifdef CONFIG_SMP
rq->idle_balance = idle_cpu(cpu);
trigger_load_balance(rq);
enum pool_workqueue_stats {
PWQ_STAT_STARTED, /* work items started execution */
PWQ_STAT_COMPLETED, /* work items completed execution */
+ PWQ_STAT_CPU_INTENSIVE, /* wq_cpu_intensive_thresh_us violations */
PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
PWQ_STAT_MAYDAY, /* maydays to rescuer */
PWQ_STAT_RESCUED, /* linked work items executed by rescuer */
static cpumask_var_t *wq_numa_possible_cpumask;
/* possible CPUs of each node */
+/*
+ * Per-cpu work items which run for longer than the following threshold are
+ * automatically considered CPU intensive and excluded from concurrency
+ * management to prevent them from noticeably delaying other per-cpu work items.
+ */
+static unsigned long wq_cpu_intensive_thresh_us = 10000;
+module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
+
static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
if (!(worker->flags & WORKER_NOT_RUNNING))
worker->pool->nr_running++;
preempt_enable();
+
+ /*
+ * CPU intensive auto-detection cares about how long a work item hogged
+ * CPU without sleeping. Reset the starting timestamp on wakeup.
+ */
+ worker->current_at = worker->task->se.sum_exec_runtime;
+
worker->sleeping = 0;
}
raw_spin_unlock_irq(&pool->lock);
}
+/**
+ * wq_worker_tick - a scheduler tick occurred while a kworker is running
+ * @task: task currently running
+ *
+ * Called from scheduler_tick(). We're in the IRQ context and the current
+ * worker's fields which follow the 'K' locking rule can be accessed safely.
+ */
+void wq_worker_tick(struct task_struct *task)
+{
+ struct worker *worker = kthread_data(task);
+ struct pool_workqueue *pwq = worker->current_pwq;
+ struct worker_pool *pool = worker->pool;
+
+ if (!pwq)
+ return;
+
+ /*
+ * If the current worker is concurrency managed and hogged the CPU for
+ * longer than wq_cpu_intensive_thresh_us, it's automatically marked
+ * CPU_INTENSIVE to avoid stalling other concurrency-managed work items.
+ */
+ if ((worker->flags & WORKER_NOT_RUNNING) ||
+ worker->task->se.sum_exec_runtime - worker->current_at <
+ wq_cpu_intensive_thresh_us * NSEC_PER_USEC)
+ return;
+
+ raw_spin_lock(&pool->lock);
+
+ worker_set_flags(worker, WORKER_CPU_INTENSIVE);
+ pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;
+
+ if (need_more_worker(pool)) {
+ pwq->stats[PWQ_STAT_CM_WAKEUP]++;
+ wake_up_worker(pool);
+ }
+
+ raw_spin_unlock(&pool->lock);
+}
+
/**
* wq_worker_last_func - retrieve worker's last work function
* @task: Task to retrieve last work function of.
{
struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool;
- bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
unsigned long work_data;
struct worker *collision;
#ifdef CONFIG_LOCKDEP
worker->current_work = work;
worker->current_func = work->func;
worker->current_pwq = pwq;
+ worker->current_at = worker->task->se.sum_exec_runtime;
work_data = *work_data_bits(work);
worker->current_color = get_work_color(work_data);
* of concurrency management and the next code block will chain
* execution of the pending work items.
*/
- if (unlikely(cpu_intensive))
+ if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE))
worker_set_flags(worker, WORKER_CPU_INTENSIVE);
/*
raw_spin_lock_irq(&pool->lock);
- /* clear cpu intensive status */
- if (unlikely(cpu_intensive))
- worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
+ /*
+ * In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked
+ * CPU intensive by wq_worker_tick() if @work hogged CPU longer than
+ * wq_cpu_intensive_thresh_us. Clear it.
+ */
+ worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
/* tag the worker for identification in schedule() */
worker->last_func = worker->current_func;
struct work_struct *current_work; /* K: work being processed and its */
work_func_t current_func; /* K: function */
struct pool_workqueue *current_pwq; /* K: pwq */
+ u64 current_at; /* K: runtime at start or last wakeup */
unsigned int current_color; /* K: color */
int sleeping; /* S: is worker sleeping? */
*/
void wq_worker_running(struct task_struct *task);
void wq_worker_sleeping(struct task_struct *task);
+void wq_worker_tick(struct task_struct *task);
work_func_t wq_worker_last_func(struct task_struct *task);
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
infl The number of currently in-flight work items.
+ CPUitsv The number of times a concurrency-managed work item hogged CPU
+ longer than the threshold (workqueue.cpu_intensive_thresh_us)
+ and got excluded from concurrency management to avoid stalling
+ other work items.
+
CMwake The number of concurrency-management wake-ups while executing a
work item of the workqueue.
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
+PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
'mem_reclaim' : self.mem_reclaim,
'started' : self.stats[PWQ_STAT_STARTED],
'completed' : self.stats[PWQ_STAT_COMPLETED],
+ 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
'mayday' : self.stats[PWQ_STAT_MAYDAY],
'rescued' : self.stats[PWQ_STAT_RESCUED], }
def table_header_str():
- return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
+ return f'{"":>24} {"total":>8} {"infl":>5} '\
+ f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
def table_row_str(self):
+ cpu_intensive = '-'
cm_wakeup = '-'
mayday = '-'
rescued = '-'
if not self.unbound:
+ cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
if self.mem_reclaim:
out = f'{self.name[-24:]:24} ' \
f'{self.stats[PWQ_STAT_STARTED]:8} ' \
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
+ f'{cpu_intensive:>7} ' \
f'{cm_wakeup:>7} ' \
f'{mayday:>7} ' \
f'{rescued:>7} '