From 5cf00fcd3c98d2eafb58ac7a649bbdb9dbc4902b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:28:20 +1000 Subject: [PATCH] md/raid10: collect some geometry fields into a dedicated structure. We will shortly be adding reshape support for RAID10 which will require it having 2 concurrent geometries (before and after). To make that easier, collect most geometry fields into 'struct geom' and access them from there. Then we will more easily be able to add a second set of fields. Note that 'copies' is not in this struct and so cannot be changed. There is little need to change this number and doing so is a lot more difficult as it requires reallocating more things. So leave it out for now. Signed-off-by: NeilBrown --- drivers/md/raid10.c | 200 +++++++++++++++++++++++++++------------------------- drivers/md/raid10.h | 23 +++--- 2 files changed, 115 insertions(+), 108 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 832fb4d..36f445f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -511,42 +511,43 @@ static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) sector_t chunk; sector_t stripe; int dev; + struct geom *geo = &conf->geo; int slot = 0; /* now calculate first sector/dev */ - chunk = r10bio->sector >> conf->chunk_shift; - sector = r10bio->sector & conf->chunk_mask; + chunk = r10bio->sector >> geo->chunk_shift; + sector = r10bio->sector & geo->chunk_mask; - chunk *= conf->near_copies; + chunk *= geo->near_copies; stripe = chunk; - dev = sector_div(stripe, conf->raid_disks); - if (conf->far_offset) - stripe *= conf->far_copies; + dev = sector_div(stripe, geo->raid_disks); + if (geo->far_offset) + stripe *= geo->far_copies; - sector += stripe << conf->chunk_shift; + sector += stripe << geo->chunk_shift; /* and calculate all the others */ - for (n=0; n < conf->near_copies; n++) { + for (n = 0; n < geo->near_copies; n++) { int d = dev; sector_t s = sector; r10bio->devs[slot].addr = sector; r10bio->devs[slot].devnum = d; slot++; - for (f = 1; f < conf->far_copies; f++) { - d += conf->near_copies; - if (d >= conf->raid_disks) - d -= conf->raid_disks; - s += conf->stride; + for (f = 1; f < geo->far_copies; f++) { + d += geo->near_copies; + if (d >= geo->raid_disks) + d -= geo->raid_disks; + s += geo->stride; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; slot++; } dev++; - if (dev >= conf->raid_disks) { + if (dev >= geo->raid_disks) { dev = 0; - sector += (conf->chunk_mask + 1); + sector += (geo->chunk_mask + 1); } } BUG_ON(slot != conf->copies); @@ -555,28 +556,29 @@ static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) { sector_t offset, chunk, vchunk; + struct geom *geo = &conf->geo; - offset = sector & conf->chunk_mask; - if (conf->far_offset) { + offset = sector & geo->chunk_mask; + if (geo->far_offset) { int fc; - chunk = sector >> conf->chunk_shift; - fc = sector_div(chunk, conf->far_copies); - dev -= fc * conf->near_copies; + chunk = sector >> geo->chunk_shift; + fc = sector_div(chunk, geo->far_copies); + dev -= fc * geo->near_copies; if (dev < 0) - dev += conf->raid_disks; + dev += geo->raid_disks; } else { - while (sector >= conf->stride) { - sector -= conf->stride; - if (dev < conf->near_copies) - dev += conf->raid_disks - conf->near_copies; + while (sector >= geo->stride) { + sector -= geo->stride; + if (dev < geo->near_copies) + dev += geo->raid_disks - geo->near_copies; else - dev -= conf->near_copies; + dev -= geo->near_copies; } - chunk = sector >> conf->chunk_shift; + chunk = sector >> geo->chunk_shift; } - vchunk = chunk * conf->raid_disks + dev; - sector_div(vchunk, conf->near_copies); - return (vchunk << conf->chunk_shift) + offset; + vchunk = chunk * geo->raid_disks + dev; + sector_div(vchunk, geo->near_copies); + return (vchunk << geo->chunk_shift) + offset; } /** @@ -599,8 +601,9 @@ static int raid10_mergeable_bvec(struct request_queue *q, int max; unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; + struct geom *geo = &conf->geo; - if (conf->near_copies < conf->raid_disks) { + if (geo->near_copies < geo->raid_disks) { max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) @@ -681,6 +684,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, struct md_rdev *rdev, *best_rdev; int do_balance; int best_slot; + struct geom *geo = &conf->geo; raid10_find_phys(conf, r10_bio); rcu_read_lock(); @@ -761,11 +765,11 @@ retry: * sequential read speed for 'far copies' arrays. So only * keep it for 'near' arrays, and review those later. */ - if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) + if (geo->near_copies > 1 && !atomic_read(&rdev->nr_pending)) break; /* for far > 1 always use the lowest address */ - if (conf->far_copies > 1) + if (geo->far_copies > 1) new_distance = r10_bio->devs[slot].addr; else new_distance = abs(r10_bio->devs[slot].addr - @@ -812,7 +816,7 @@ static int raid10_congested(void *data, int bits) if (mddev_congested(mddev, bits)) return 1; rcu_read_lock(); - for (i = 0; i < conf->raid_disks && ret == 0; i++) { + for (i = 0; i < conf->geo.raid_disks && ret == 0; i++) { struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); @@ -979,7 +983,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) struct r10bio *r10_bio; struct bio *read_bio; int i; - int chunk_sects = conf->chunk_mask + 1; + sector_t chunk_mask = conf->geo.chunk_mask; + int chunk_sects = chunk_mask + 1; const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_fua = (bio->bi_rw & REQ_FUA); @@ -997,9 +1002,9 @@ static void make_request(struct mddev *mddev, struct bio * bio) /* If this request crosses a chunk boundary, we need to * split it. This will only happen for 1 PAGE (or less) requests. */ - if (unlikely( (bio->bi_sector & conf->chunk_mask) + (bio->bi_size >> 9) - > chunk_sects && - conf->near_copies < conf->raid_disks)) { + if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) + > chunk_sects + && conf->geo.near_copies < conf->geo.raid_disks)) { struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ if (bio->bi_vcnt != 1 || @@ -1368,19 +1373,19 @@ static void status(struct seq_file *seq, struct mddev *mddev) struct r10conf *conf = mddev->private; int i; - if (conf->near_copies < conf->raid_disks) + if (conf->geo.near_copies < conf->geo.raid_disks) seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); - if (conf->near_copies > 1) - seq_printf(seq, " %d near-copies", conf->near_copies); - if (conf->far_copies > 1) { - if (conf->far_offset) - seq_printf(seq, " %d offset-copies", conf->far_copies); + if (conf->geo.near_copies > 1) + seq_printf(seq, " %d near-copies", conf->geo.near_copies); + if (conf->geo.far_copies > 1) { + if (conf->geo.far_offset) + seq_printf(seq, " %d offset-copies", conf->geo.far_copies); else - seq_printf(seq, " %d far-copies", conf->far_copies); + seq_printf(seq, " %d far-copies", conf->geo.far_copies); } - seq_printf(seq, " [%d/%d] [", conf->raid_disks, - conf->raid_disks - mddev->degraded); - for (i = 0; i < conf->raid_disks; i++) + seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, + conf->geo.raid_disks - mddev->degraded); + for (i = 0; i < conf->geo.raid_disks; i++) seq_printf(seq, "%s", conf->mirrors[i].rdev && test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); @@ -1403,7 +1408,7 @@ static int enough(struct r10conf *conf, int ignore) if (conf->mirrors[first].rdev && first != ignore) cnt++; - first = (first+1) % conf->raid_disks; + first = (first+1) % conf->geo.raid_disks; } if (cnt == 0) return 0; @@ -1445,7 +1450,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) "md/raid10:%s: Disk failure on %s, disabling device.\n" "md/raid10:%s: Operation continuing on %d devices.\n", mdname(mddev), bdevname(rdev->bdev, b), - mdname(mddev), conf->raid_disks - mddev->degraded); + mdname(mddev), conf->geo.raid_disks - mddev->degraded); } static void print_conf(struct r10conf *conf) @@ -1458,10 +1463,10 @@ static void print_conf(struct r10conf *conf) printk(KERN_DEBUG "(!conf)\n"); return; } - printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, - conf->raid_disks); + printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded, + conf->geo.raid_disks); - for (i = 0; i < conf->raid_disks; i++) { + for (i = 0; i < conf->geo.raid_disks; i++) { char b[BDEVNAME_SIZE]; tmp = conf->mirrors + i; if (tmp->rdev) @@ -1493,7 +1498,7 @@ static int raid10_spare_active(struct mddev *mddev) * Find all non-in_sync disks within the RAID10 configuration * and mark them in_sync */ - for (i = 0; i < conf->raid_disks; i++) { + for (i = 0; i < conf->geo.raid_disks; i++) { tmp = conf->mirrors + i; if (tmp->replacement && tmp->replacement->recovery_offset == MaxSector @@ -1535,7 +1540,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) int err = -EEXIST; int mirror; int first = 0; - int last = conf->raid_disks - 1; + int last = conf->geo.raid_disks - 1; struct request_queue *q = bdev_get_queue(rdev->bdev); if (mddev->recovery_cp < MaxSector) @@ -2603,7 +2608,7 @@ static int init_resync(struct r10conf *conf) buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; BUG_ON(conf->r10buf_pool); conf->have_replacement = 0; - for (i = 0; i < conf->raid_disks; i++) + for (i = 0; i < conf->geo.raid_disks; i++) if (conf->mirrors[i].replacement) conf->have_replacement = 1; conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); @@ -2657,6 +2662,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, sector_t sync_blocks; sector_t sectors_skipped = 0; int chunks_skipped = 0; + sector_t chunk_mask = conf->geo.chunk_mask; if (!conf->r10buf_pool) if (init_resync(conf)) @@ -2680,7 +2686,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) bitmap_end_sync(mddev->bitmap, mddev->curr_resync, &sync_blocks, 1); - else for (i=0; iraid_disks; i++) { + else for (i = 0; i < conf->geo.raid_disks; i++) { sector_t sect = raid10_find_virt(conf, mddev->curr_resync, i); bitmap_end_sync(mddev->bitmap, sect, @@ -2694,7 +2700,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* Completed a full sync so the replacements * are now fully recovered. */ - for (i = 0; i < conf->raid_disks; i++) + for (i = 0; i < conf->geo.raid_disks; i++) if (conf->mirrors[i].replacement) conf->mirrors[i].replacement ->recovery_offset @@ -2707,7 +2713,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, *skipped = 1; return sectors_skipped; } - if (chunks_skipped >= conf->raid_disks) { + if (chunks_skipped >= conf->geo.raid_disks) { /* if there has been nothing to do on any drive, * then there is nothing to do at all.. */ @@ -2721,9 +2727,9 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* make sure whole request will fit in a chunk - if chunks * are meaningful */ - if (conf->near_copies < conf->raid_disks && - max_sector > (sector_nr | conf->chunk_mask)) - max_sector = (sector_nr | conf->chunk_mask) + 1; + if (conf->geo.near_copies < conf->geo.raid_disks && + max_sector > (sector_nr | chunk_mask)) + max_sector = (sector_nr | chunk_mask) + 1; /* * If there is non-resync activity waiting for us then * put in a delay to throttle resync. @@ -2752,7 +2758,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int j; r10_bio = NULL; - for (i=0 ; iraid_disks; i++) { + for (i = 0 ; i < conf->geo.raid_disks; i++) { int still_degraded; struct r10bio *rb2; sector_t sect; @@ -2806,7 +2812,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* Need to check if the array will still be * degraded */ - for (j=0; jraid_disks; j++) + for (j = 0; j < conf->geo.raid_disks; j++) if (conf->mirrors[j].rdev == NULL || test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { still_degraded = 1; @@ -2984,9 +2990,9 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->sector = sector_nr; set_bit(R10BIO_IsSync, &r10_bio->state); raid10_find_phys(conf, r10_bio); - r10_bio->sectors = (sector_nr | conf->chunk_mask) - sector_nr +1; + r10_bio->sectors = (sector_nr | chunk_mask) - sector_nr + 1; - for (i=0; icopies; i++) { + for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; sector_t first_bad, sector; int bad_sectors; @@ -3152,16 +3158,16 @@ raid10_size(struct mddev *mddev, sector_t sectors, int raid_disks) struct r10conf *conf = mddev->private; if (!raid_disks) - raid_disks = conf->raid_disks; + raid_disks = conf->geo.raid_disks; if (!sectors) sectors = conf->dev_sectors; - size = sectors >> conf->chunk_shift; - sector_div(size, conf->far_copies); + size = sectors >> conf->geo.chunk_shift; + sector_div(size, conf->geo.far_copies); size = size * raid_disks; - sector_div(size, conf->near_copies); + sector_div(size, conf->geo.near_copies); - return size << conf->chunk_shift; + return size << conf->geo.chunk_shift; } static void calc_sectors(struct r10conf *conf, sector_t size) @@ -3171,10 +3177,10 @@ static void calc_sectors(struct r10conf *conf, sector_t size) * conf->stride */ - size = size >> conf->chunk_shift; - sector_div(size, conf->far_copies); - size = size * conf->raid_disks; - sector_div(size, conf->near_copies); + size = size >> conf->geo.chunk_shift; + sector_div(size, conf->geo.far_copies); + size = size * conf->geo.raid_disks; + sector_div(size, conf->geo.near_copies); /* 'size' is now the number of chunks in the array */ /* calculate "used chunks per device" */ size = size * conf->copies; @@ -3182,15 +3188,15 @@ static void calc_sectors(struct r10conf *conf, sector_t size) /* We need to round up when dividing by raid_disks to * get the stride size. */ - size = DIV_ROUND_UP_SECTOR_T(size, conf->raid_disks); + size = DIV_ROUND_UP_SECTOR_T(size, conf->geo.raid_disks); - conf->dev_sectors = size << conf->chunk_shift; + conf->dev_sectors = size << conf->geo.chunk_shift; - if (conf->far_offset) - conf->stride = 1 << conf->chunk_shift; + if (conf->geo.far_offset) + conf->geo.stride = 1 << conf->geo.chunk_shift; else { - sector_div(size, conf->far_copies); - conf->stride = size << conf->chunk_shift; + sector_div(size, conf->geo.far_copies); + conf->geo.stride = size << conf->geo.chunk_shift; } } @@ -3234,13 +3240,13 @@ static struct r10conf *setup_conf(struct mddev *mddev) goto out; - conf->raid_disks = mddev->raid_disks; - conf->near_copies = nc; - conf->far_copies = fc; + conf->geo.raid_disks = mddev->raid_disks; + conf->geo.near_copies = nc; + conf->geo.far_copies = fc; conf->copies = nc*fc; - conf->far_offset = fo; - conf->chunk_mask = mddev->new_chunk_sectors - 1; - conf->chunk_shift = ffz(~mddev->new_chunk_sectors); + conf->geo.far_offset = fo; + conf->geo.chunk_mask = mddev->new_chunk_sectors - 1; + conf->geo.chunk_shift = ffz(~mddev->new_chunk_sectors); conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, r10bio_pool_free, conf); @@ -3304,16 +3310,16 @@ static int run(struct mddev *mddev) chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); - if (conf->raid_disks % conf->near_copies) - blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); + if (conf->geo.raid_disks % conf->geo.near_copies) + blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); else blk_queue_io_opt(mddev->queue, chunk_size * - (conf->raid_disks / conf->near_copies)); + (conf->geo.raid_disks / conf->geo.near_copies)); rdev_for_each(rdev, mddev) { disk_idx = rdev->raid_disk; - if (disk_idx >= conf->raid_disks + if (disk_idx >= conf->geo.raid_disks || disk_idx < 0) continue; disk = conf->mirrors + disk_idx; @@ -3341,7 +3347,7 @@ static int run(struct mddev *mddev) } mddev->degraded = 0; - for (i = 0; i < conf->raid_disks; i++) { + for (i = 0; i < conf->geo.raid_disks; i++) { disk = conf->mirrors + i; @@ -3368,8 +3374,8 @@ static int run(struct mddev *mddev) mdname(mddev)); printk(KERN_INFO "md/raid10:%s: active with %d out of %d devices\n", - mdname(mddev), conf->raid_disks - mddev->degraded, - conf->raid_disks); + mdname(mddev), conf->geo.raid_disks - mddev->degraded, + conf->geo.raid_disks); /* * Ok, everything is just fine now */ @@ -3386,9 +3392,9 @@ static int run(struct mddev *mddev) * maybe... */ { - int stripe = conf->raid_disks * + int stripe = conf->geo.raid_disks * ((mddev->chunk_sectors << 9) / PAGE_SIZE); - stripe /= conf->near_copies; + stripe /= conf->geo.near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } @@ -3460,7 +3466,7 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) struct r10conf *conf = mddev->private; sector_t oldsize, size; - if (conf->far_copies > 1 && !conf->far_offset) + if (conf->geo.far_copies > 1 && !conf->geo.far_offset) return -EINVAL; oldsize = raid10_size(mddev, 0, 0); diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 7c615613..4c4942a 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -14,33 +14,34 @@ struct mirror_info { struct r10conf { struct mddev *mddev; struct mirror_info *mirrors; - int raid_disks; spinlock_t device_lock; /* geometry */ - int near_copies; /* number of copies laid out + struct geom { + int raid_disks; + int near_copies; /* number of copies laid out * raid0 style */ - int far_copies; /* number of copies laid out + int far_copies; /* number of copies laid out * at large strides across drives */ - int far_offset; /* far_copies are offset by 1 + int far_offset; /* far_copies are offset by 1 * stripe instead of many */ - int copies; /* near_copies * far_copies. - * must be <= raid_disks - */ - sector_t stride; /* distance between far copies. + sector_t stride; /* distance between far copies. * This is size / far_copies unless * far_offset, in which case it is * 1 stripe. */ + int chunk_shift; /* shift from chunks to sectors */ + sector_t chunk_mask; + } geo; + int copies; /* near_copies * far_copies. + * must be <= raid_disks + */ sector_t dev_sectors; /* temp copy of * mddev->dev_sectors */ - int chunk_shift; /* shift from chunks to sectors */ - sector_t chunk_mask; - struct list_head retry_list; /* queue pending writes and submit them on unplug */ struct bio_list pending_bio_list; -- 2.7.4