* elapsed.
*/
if (bfqq == bfqd->in_service_queue &&
- (bfqd->rq_in_driver == 0 ||
+ (bfqd->tot_rq_in_driver == 0 ||
(bfqq->last_serv_time_ns > 0 &&
- bfqd->rqs_injected && bfqd->rq_in_driver > 0)) &&
+ bfqd->rqs_injected && bfqd->tot_rq_in_driver > 0)) &&
time_is_before_eq_jiffies(bfqq->decrease_time_jif +
msecs_to_jiffies(10))) {
bfqd->last_empty_occupied_ns = ktime_get_ns();
* will be set in case injection is performed
* on bfqq before rq is completed).
*/
- if (bfqd->rq_in_driver == 0)
+ if (bfqd->tot_rq_in_driver == 0)
bfqd->rqs_injected = false;
}
}
static void bfq_end_wr(struct bfq_data *bfqd)
{
struct bfq_queue *bfqq;
+ int i;
spin_lock_irq(&bfqd->lock);
- list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
- bfq_bfqq_end_wr(bfqq);
+ for (i = 0; i < bfqd->num_actuators; i++) {
+ list_for_each_entry(bfqq, &bfqd->active_list[i], bfqq_list)
+ bfq_bfqq_end_wr(bfqq);
+ }
list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
bfq_bfqq_end_wr(bfqq);
bfq_end_wr_async(bfqd);
* - start a new observation interval with this dispatch
*/
if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC &&
- bfqd->rq_in_driver == 0)
+ bfqd->tot_rq_in_driver == 0)
goto update_rate_and_reset;
/* Update sampling information */
bfqd->peak_rate_samples++;
- if ((bfqd->rq_in_driver > 0 ||
+ if ((bfqd->tot_rq_in_driver > 0 ||
now_ns - bfqd->last_completion < BFQ_MIN_TT)
&& !BFQ_RQ_SEEKY(bfqd, bfqd->last_position, rq))
bfqd->sequential_samples++;
return false;
return (bfqq->wr_coeff > 1 &&
- (bfqd->wr_busy_queues <
- tot_busy_queues ||
- bfqd->rq_in_driver >=
- bfqq->dispatched + 4)) ||
+ (bfqd->wr_busy_queues < tot_busy_queues ||
+ bfqd->tot_rq_in_driver >= bfqq->dispatched + 4)) ||
bfq_asymmetric_scenario(bfqd, bfqq) ||
tot_busy_queues == 1;
}
{
struct bfq_queue *bfqq, *in_serv_bfqq = bfqd->in_service_queue;
unsigned int limit = in_serv_bfqq->inject_limit;
+ int i;
+
/*
* If
* - bfqq is not weight-raised and therefore does not carry
)
limit = 1;
- if (bfqd->rq_in_driver >= limit)
+ if (bfqd->tot_rq_in_driver >= limit)
return NULL;
/*
* (and re-added only if it gets new requests, but then it
* is assigned again enough budget for its new backlog).
*/
- list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
- if (!RB_EMPTY_ROOT(&bfqq->sort_list) &&
- (in_serv_always_inject || bfqq->wr_coeff > 1) &&
- bfq_serv_to_charge(bfqq->next_rq, bfqq) <=
- bfq_bfqq_budget_left(bfqq)) {
+ for (i = 0; i < bfqd->num_actuators; i++) {
+ list_for_each_entry(bfqq, &bfqd->active_list[i], bfqq_list)
+ if (!RB_EMPTY_ROOT(&bfqq->sort_list) &&
+ (in_serv_always_inject || bfqq->wr_coeff > 1) &&
+ bfq_serv_to_charge(bfqq->next_rq, bfqq) <=
+ bfq_bfqq_budget_left(bfqq)) {
/*
* Allow for only one large in-flight request
* on non-rotational devices, for the
else
limit = in_serv_bfqq->inject_limit;
- if (bfqd->rq_in_driver < limit) {
+ if (bfqd->tot_rq_in_driver < limit) {
bfqd->rqs_injected = true;
return bfqq;
}
}
+ }
+
+ return NULL;
+}
+
+static struct bfq_queue *
+bfq_find_active_bfqq_for_actuator(struct bfq_data *bfqd, int idx)
+{
+ struct bfq_queue *bfqq;
+
+ if (bfqd->in_service_queue &&
+ bfqd->in_service_queue->actuator_idx == idx)
+ return bfqd->in_service_queue;
+
+ list_for_each_entry(bfqq, &bfqd->active_list[idx], bfqq_list) {
+ if (!RB_EMPTY_ROOT(&bfqq->sort_list) &&
+ bfq_serv_to_charge(bfqq->next_rq, bfqq) <=
+ bfq_bfqq_budget_left(bfqq)) {
+ return bfqq;
+ }
+ }
return NULL;
}
+/*
+ * Perform a linear scan of each actuator, until an actuator is found
+ * for which the following two conditions hold: the load of the
+ * actuator is below the threshold (see comments on actuator_load_threshold
+ * for details), and there is a queue that contains I/O for that
+ * actuator. On success, return that queue.
+ */
+static struct bfq_queue *
+bfq_find_bfqq_for_underused_actuator(struct bfq_data *bfqd)
+{
+ int i;
+
+ for (i = 0 ; i < bfqd->num_actuators; i++) {
+ if (bfqd->rq_in_driver[i] < bfqd->actuator_load_threshold) {
+ struct bfq_queue *bfqq =
+ bfq_find_active_bfqq_for_actuator(bfqd, i);
+
+ if (bfqq)
+ return bfqq;
+ }
+ }
+
+ return NULL;
+}
+
+
/*
* Select a queue for service. If we have a current queue in service,
* check whether to continue servicing it, or retrieve and set a new one.
*/
static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
{
- struct bfq_queue *bfqq;
+ struct bfq_queue *bfqq, *inject_bfqq;
struct request *next_rq;
enum bfqq_expiration reason = BFQQE_BUDGET_TIMEOUT;
goto expire;
check_queue:
+ /*
+ * If some actuator is underutilized, but the in-service
+ * queue does not contain I/O for that actuator, then try to
+ * inject I/O for that actuator.
+ */
+ inject_bfqq = bfq_find_bfqq_for_underused_actuator(bfqd);
+ if (inject_bfqq && inject_bfqq != bfqq)
+ return inject_bfqq;
+
/*
* This loop is rarely executed more than once. Even when it
* happens, it is much more convenient to re-execute this loop
/*
* We exploit the bfq_finish_requeue_request hook to
- * decrement rq_in_driver, but
+ * decrement tot_rq_in_driver, but
* bfq_finish_requeue_request will not be invoked on
* this request. So, to avoid unbalance, just start
- * this request, without incrementing rq_in_driver. As
- * a negative consequence, rq_in_driver is deceptively
+ * this request, without incrementing tot_rq_in_driver. As
+ * a negative consequence, tot_rq_in_driver is deceptively
* lower than it should be while this request is in
* service. This may cause bfq_schedule_dispatch to be
* invoked uselessly.
* bfq_finish_requeue_request hook, if defined, is
* probably invoked also on this request. So, by
* exploiting this hook, we could 1) increment
- * rq_in_driver here, and 2) decrement it in
+ * tot_rq_in_driver here, and 2) decrement it in
* bfq_finish_requeue_request. Such a solution would
* let the value of the counter be always accurate,
* but it would entail using an extra interface
* Of course, serving one request at a time may cause loss of
* throughput.
*/
- if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0)
+ if (bfqd->strict_guarantees && bfqd->tot_rq_in_driver > 0)
goto exit;
bfqq = bfq_select_queue(bfqd);
if (rq) {
inc_in_driver_start_rq:
- bfqd->rq_in_driver++;
+ bfqd->rq_in_driver[bfqq->actuator_idx]++;
+ bfqd->tot_rq_in_driver++;
start_rq:
rq->rq_flags |= RQF_STARTED;
}
struct bfq_queue *bfqq = bfqd->in_service_queue;
bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
- bfqd->rq_in_driver);
+ bfqd->tot_rq_in_driver);
if (bfqd->hw_tag == 1)
return;
* sum is not exact, as it's not taking into account deactivated
* requests.
*/
- if (bfqd->rq_in_driver + bfqd->queued <= BFQ_HW_QUEUE_THRESHOLD)
+ if (bfqd->tot_rq_in_driver + bfqd->queued <= BFQ_HW_QUEUE_THRESHOLD)
return;
/*
if (bfqq && bfq_bfqq_has_short_ttime(bfqq) &&
bfqq->dispatched + bfqq->queued[0] + bfqq->queued[1] <
BFQ_HW_QUEUE_THRESHOLD &&
- bfqd->rq_in_driver < BFQ_HW_QUEUE_THRESHOLD)
+ bfqd->tot_rq_in_driver < BFQ_HW_QUEUE_THRESHOLD)
return;
if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
bfq_update_hw_tag(bfqd);
- bfqd->rq_in_driver--;
+ bfqd->rq_in_driver[bfqq->actuator_idx]--;
+ bfqd->tot_rq_in_driver--;
bfqq->dispatched--;
if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
BFQQE_NO_MORE_REQUESTS);
}
- if (!bfqd->rq_in_driver)
+ if (!bfqd->tot_rq_in_driver)
bfq_schedule_dispatch(bfqd);
}
* conditions to do it, or we can lower the last base value
* computed.
*
- * NOTE: (bfqd->rq_in_driver == 1) means that there is no I/O
+ * NOTE: (bfqd->tot_rq_in_driver == 1) means that there is no I/O
* request in flight, because this function is in the code
* path that handles the completion of a request of bfqq, and,
* in particular, this function is executed before
- * bfqd->rq_in_driver is decremented in such a code path.
+ * bfqd->tot_rq_in_driver is decremented in such a code path.
*/
- if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) ||
+ if ((bfqq->last_serv_time_ns == 0 && bfqd->tot_rq_in_driver == 1) ||
tot_time_ns < bfqq->last_serv_time_ns) {
if (bfqq->last_serv_time_ns == 0) {
/*
bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
}
bfqq->last_serv_time_ns = tot_time_ns;
- } else if (!bfqd->rqs_injected && bfqd->rq_in_driver == 1)
+ } else if (!bfqd->rqs_injected && bfqd->tot_rq_in_driver == 1)
/*
* No I/O injected and no request still in service in
* the drive: these are the exact conditions for
bfqd->num_groups_with_pending_reqs = 0;
#endif
- INIT_LIST_HEAD(&bfqd->active_list);
+ INIT_LIST_HEAD(&bfqd->active_list[0]);
+ INIT_LIST_HEAD(&bfqd->active_list[1]);
INIT_LIST_HEAD(&bfqd->idle_list);
INIT_HLIST_HEAD(&bfqd->burst_list);
ref_wr_duration[blk_queue_nonrot(bfqd->queue)];
bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+ /* see comments on the definition of next field inside bfq_data */
+ bfqd->actuator_load_threshold = 4;
+
spin_lock_init(&bfqd->lock);
/*
/* number of queued requests */
int queued;
/* number of requests dispatched and waiting for completion */
- int rq_in_driver;
+ int tot_rq_in_driver;
+ /*
+ * number of requests dispatched and waiting for completion
+ * for each actuator
+ */
+ int rq_in_driver[BFQ_MAX_ACTUATORS];
/* true if the device is non rotational and performs queueing */
bool nonrot_with_queueing;
/* maximum budget allotted to a bfq_queue before rescheduling */
int bfq_max_budget;
- /* list of all the bfq_queues active on the device */
- struct list_head active_list;
+ /*
+ * List of all the bfq_queues active for a specific actuator
+ * on the device. Keeping active queues separate on a
+ * per-actuator basis helps implementing per-actuator
+ * injection more efficiently.
+ */
+ struct list_head active_list[BFQ_MAX_ACTUATORS];
/* list of all the bfq_queues idle on the device */
struct list_head idle_list;
sector_t sector[BFQ_MAX_ACTUATORS];
sector_t nr_sectors[BFQ_MAX_ACTUATORS];
struct blk_independent_access_range ia_ranges[BFQ_MAX_ACTUATORS];
+
+ /*
+ * If the number of I/O requests queued in the device for a
+ * given actuator is below next threshold, then the actuator
+ * is deemed as underutilized. If this condition is found to
+ * hold for some actuator upon a dispatch, but (i) the
+ * in-service queue does not contain I/O for that actuator,
+ * while (ii) some other queue does contain I/O for that
+ * actuator, then the head I/O request of the latter queue is
+ * returned (injected), instead of the head request of the
+ * currently in-service queue.
+ *
+ * We set the threshold, empirically, to the minimum possible
+ * value for which an actuator is fully utilized, or close to
+ * be fully utilized. By doing so, injected I/O 'steals' as
+ * few drive-queue slots as possibile to the in-service
+ * queue. This reduces as much as possible the probability
+ * that the service of I/O from the in-service bfq_queue gets
+ * delayed because of slot exhaustion, i.e., because all the
+ * slots of the drive queue are filled with I/O injected from
+ * other queues (NCQ provides for 32 slots).
+ */
+ unsigned int actuator_load_threshold;
};
enum bfqq_state_flags {