Metadata I/Os are scheduled to minimize their impact on user data I/Os.
When there are enough LUNs instantiated (i.e., enough bandwidth), it is
easy to interleave metadata and data one after the other so that
metadata I/Os are the ones being blocked and not vice-versa.
We do this by calculating the distance between the I/Os in terms of the
LUNs that are not in used, and selecting a free LUN that satisfies a
the simple heuristic that metadata is scheduled behind. The per-LUN
semaphores guarantee consistency. This works fine on >1 LUN
configuration. However, when a single LUN is instantiated, this design
leads to a deadlock, where metadata waits to be scheduled on a free LUN.
This patch implements the 1 LUN case by simply scheduling the metadada
I/O after the data I/O. In the process, we refactor the way a line is
replaced to ensure that metadata writes are submitted after data writes
in order to guarantee block sequentiality. Note that, since there is
only one LUN, both I/Os will block each other by design. However, such
configuration only pursues tight read latencies, not write bandwidth.
Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
spin_unlock(&l_mg->free_lock);
}
-void pblk_line_replace_data(struct pblk *pblk)
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *cur, *new;
+ struct pblk_line *cur, *new = NULL;
unsigned int left_seblks;
int is_next = 0;
cur = l_mg->data_line;
new = l_mg->data_next;
if (!new)
- return;
+ goto out;
l_mg->data_line = new;
spin_lock(&l_mg->free_lock);
l_mg->data_line = NULL;
l_mg->data_next = NULL;
spin_unlock(&l_mg->free_lock);
- return;
+ goto out;
}
pblk_line_setup_metadata(new, l_mg, &pblk->lm);
/* If line is not fully erased, erase it */
if (atomic_read(&new->left_eblks)) {
if (pblk_line_erase(pblk, new))
- return;
+ goto out;
} else {
io_schedule();
}
if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return;
+ goto out;
goto retry_setup;
}
if (!pblk_line_init_bb(pblk, new, 1)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return;
+ goto out;
goto retry_setup;
}
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+
+out:
+ return new;
}
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
}
lm->emeta_bb = geo->nr_luns - i;
- lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
- geo->sec_per_blk);
+
+ lm->min_blk_line = 1;
+ if (geo->nr_luns > 1)
+ lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
+ lm->emeta_sec[0], geo->sec_per_blk);
+
if (lm->min_blk_line > lm->blk_per_line) {
pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
lm->blk_per_line);
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
- struct pblk_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta;
struct pblk_w_ctx *w_ctx;
- __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
+ __le64 *lba_list;
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
+ if (pblk_line_is_full(line)) {
+ struct pblk_line *prev_line = line;
+
+ line = pblk_line_replace_data(pblk);
+ pblk_line_close_meta(pblk, prev_line);
+ }
+
+ emeta = line->emeta;
+ lba_list = emeta_to_lbas(pblk, emeta->buf);
+
paddr = pblk_alloc_page(pblk, line, nr_secs);
for (i = 0; i < nr_secs; i++, paddr++) {
}
}
- if (pblk_line_is_full(line)) {
- struct pblk_line *prev_line = line;
-
- pblk_line_replace_data(pblk);
- pblk_line_close_meta(pblk, prev_line);
- }
-
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
}
int alloc_type, gfp_t gfp_mask);
struct pblk_line *pblk_line_get(struct pblk *pblk);
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-void pblk_line_replace_data(struct pblk *pblk);
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk);