2 * Zoned block device handling
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/rbtree.h>
14 #include <linux/blkdev.h>
16 static inline sector_t blk_zone_start(struct request_queue *q,
19 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
21 return sector & ~zone_mask;
25 * Return true if a request is a write requests that needs zone write locking.
27 bool blk_req_needs_zone_write_lock(struct request *rq)
29 if (!rq->q->seq_zones_wlock)
32 if (blk_rq_is_passthrough(rq))
36 case REQ_OP_WRITE_ZEROES:
37 case REQ_OP_WRITE_SAME:
39 return blk_rq_zone_is_seq(rq);
44 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
46 void __blk_req_zone_write_lock(struct request *rq)
48 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
49 rq->q->seq_zones_wlock)))
52 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
53 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
55 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
57 void __blk_req_zone_write_unlock(struct request *rq)
59 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
60 if (rq->q->seq_zones_wlock)
61 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
62 rq->q->seq_zones_wlock));
64 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
67 * Check that a zone report belongs to the partition.
68 * If yes, fix its start sector and write pointer, copy it in the
69 * zone information array and return true. Return false otherwise.
71 static bool blkdev_report_zone(struct block_device *bdev,
73 struct blk_zone *zone)
75 sector_t offset = get_start_sect(bdev);
77 if (rep->start < offset)
81 if (rep->start + rep->len > bdev->bd_part->nr_sects)
84 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
85 rep->wp = rep->start + rep->len;
88 memcpy(zone, rep, sizeof(struct blk_zone));
94 * blkdev_report_zones - Get zones information
95 * @bdev: Target block device
96 * @sector: Sector from which to report zones
97 * @zones: Array of zone structures where to return the zones information
98 * @nr_zones: Number of zone structures in the zone array
99 * @gfp_mask: Memory allocation flags (for bio_alloc)
102 * Get zone information starting from the zone containing @sector.
103 * The number of zone information reported may be less than the number
104 * requested by @nr_zones. The number of zones actually reported is
105 * returned in @nr_zones.
107 int blkdev_report_zones(struct block_device *bdev,
109 struct blk_zone *zones,
110 unsigned int *nr_zones,
113 struct request_queue *q = bdev_get_queue(bdev);
114 struct blk_zone_report_hdr *hdr;
115 unsigned int nrz = *nr_zones;
119 unsigned int nr_pages;
122 unsigned int i, n, nz;
130 if (!blk_queue_is_zoned(q))
136 if (sector > bdev->bd_part->nr_sects) {
142 * The zone report has a header. So make room for it in the
143 * payload. Also make sure that the report fits in a single BIO
144 * that will not be split down the stack.
146 rep_bytes = sizeof(struct blk_zone_report_hdr) +
147 sizeof(struct blk_zone) * nrz;
148 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
149 if (rep_bytes > (queue_max_sectors(q) << 9))
150 rep_bytes = queue_max_sectors(q) << 9;
152 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
153 rep_bytes >> PAGE_SHIFT);
154 nr_pages = min_t(unsigned int, nr_pages,
155 queue_max_segments(q));
157 bio = bio_alloc(gfp_mask, nr_pages);
161 bio_set_dev(bio, bdev);
162 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
163 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
165 for (i = 0; i < nr_pages; i++) {
166 page = alloc_page(gfp_mask);
171 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
180 ret = submit_bio_wait(bio);
185 * Process the report result: skip the header and go through the
186 * reported zones to fixup and fixup the zone information for
187 * partitions. At the same time, return the zone information into
193 bio_for_each_segment_all(bv, bio, i) {
198 addr = kmap_atomic(bv->bv_page);
200 /* Get header in the first page */
203 hdr = (struct blk_zone_report_hdr *) addr;
204 nr_rep = hdr->nr_zones;
205 ofst = sizeof(struct blk_zone_report_hdr);
208 /* Fixup and report zones */
209 while (ofst < bv->bv_len &&
210 n < nr_rep && nz < nrz) {
211 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
213 ofst += sizeof(struct blk_zone);
219 if (n >= nr_rep || nz >= nrz)
226 bio_for_each_segment_all(bv, bio, i)
227 __free_page(bv->bv_page);
232 EXPORT_SYMBOL_GPL(blkdev_report_zones);
235 * blkdev_reset_zones - Reset zones write pointer
236 * @bdev: Target block device
237 * @sector: Start sector of the first zone to reset
238 * @nr_sectors: Number of sectors, at least the length of one zone
239 * @gfp_mask: Memory allocation flags (for bio_alloc)
242 * Reset the write pointer of the zones contained in the range
243 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
244 * is valid, but the specified range should not contain conventional zones.
246 int blkdev_reset_zones(struct block_device *bdev,
247 sector_t sector, sector_t nr_sectors,
250 struct request_queue *q = bdev_get_queue(bdev);
251 sector_t zone_sectors;
252 sector_t end_sector = sector + nr_sectors;
259 if (!blk_queue_is_zoned(q))
262 if (end_sector > bdev->bd_part->nr_sects)
266 /* Check alignment (handle eventual smaller last zone) */
267 zone_sectors = blk_queue_zone_sectors(q);
268 if (sector & (zone_sectors - 1))
271 if ((nr_sectors & (zone_sectors - 1)) &&
272 end_sector != bdev->bd_part->nr_sects)
275 while (sector < end_sector) {
277 bio = bio_alloc(gfp_mask, 0);
278 bio->bi_iter.bi_sector = sector;
279 bio_set_dev(bio, bdev);
280 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
282 ret = submit_bio_wait(bio);
288 sector += zone_sectors;
290 /* This may take a while, so be nice to others */
297 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
300 * BLKREPORTZONE ioctl processing.
301 * Called from blkdev_ioctl.
303 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
304 unsigned int cmd, unsigned long arg)
306 void __user *argp = (void __user *)arg;
307 struct request_queue *q;
308 struct blk_zone_report rep;
309 struct blk_zone *zones;
315 q = bdev_get_queue(bdev);
319 if (!blk_queue_is_zoned(q))
322 if (!capable(CAP_SYS_ADMIN))
325 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
331 if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
334 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
335 GFP_KERNEL | __GFP_ZERO);
339 ret = blkdev_report_zones(bdev, rep.sector,
340 zones, &rep.nr_zones,
345 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
351 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
352 sizeof(struct blk_zone) * rep.nr_zones))
363 * BLKRESETZONE ioctl processing.
364 * Called from blkdev_ioctl.
366 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
367 unsigned int cmd, unsigned long arg)
369 void __user *argp = (void __user *)arg;
370 struct request_queue *q;
371 struct blk_zone_range zrange;
376 q = bdev_get_queue(bdev);
380 if (!blk_queue_is_zoned(q))
383 if (!capable(CAP_SYS_ADMIN))
386 if (!(mode & FMODE_WRITE))
389 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
392 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,