drivers/md/dm-kcopyd.c

   1 /*
   2  * Copyright (C) 2002 Sistina Software (UK) Limited.
   3  * Copyright (C) 2006 Red Hat GmbH
   4  *
   5  * This file is released under the GPL.
   6  *
   7  * Kcopyd provides a simple interface for copying an area of one
   8  * block-device to one or more other block-devices, with an asynchronous
   9  * completion notification.
  10  */
  11
  12 #include <linux/types.h>
  13 #include <linux/atomic.h>
  14 #include <linux/blkdev.h>
  15 #include <linux/fs.h>
  16 #include <linux/init.h>
  17 #include <linux/list.h>
  18 #include <linux/mempool.h>
  19 #include <linux/module.h>
  20 #include <linux/pagemap.h>
  21 #include <linux/slab.h>
  22 #include <linux/vmalloc.h>
  23 #include <linux/workqueue.h>
  24 #include <linux/mutex.h>
  25 #include <linux/device-mapper.h>
  26 #include <linux/dm-kcopyd.h>
  27
  28 #include "dm.h"
  29
  30 #define SUB_JOB_SIZE    128
  31 #define SPLIT_COUNT     8
  32 #define MIN_JOBS        8
  33 #define RESERVE_PAGES   (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
  34
  35 /*-----------------------------------------------------------------
  36  * Each kcopyd client has its own little pool of preallocated
  37  * pages for kcopyd io.
  38  *---------------------------------------------------------------*/
  39 struct dm_kcopyd_client {
  40         struct page_list *pages;
  41         unsigned nr_reserved_pages;
  42         unsigned nr_free_pages;
  43
  44         struct dm_io_client *io_client;
  45
  46         wait_queue_head_t destroyq;
  47         atomic_t nr_jobs;
  48
  49         mempool_t *job_pool;
  50
  51         struct workqueue_struct *kcopyd_wq;
  52         struct work_struct kcopyd_work;
  53
  54 /*
  55  * We maintain three lists of jobs:
  56  *
  57  * i)   jobs waiting for pages
  58  * ii)  jobs that have pages, and are waiting for the io to be issued.
  59  * iii) jobs that have completed.
  60  *
  61  * All three of these are protected by job_lock.
  62  */
  63         spinlock_t job_lock;
  64         struct list_head complete_jobs;
  65         struct list_head io_jobs;
  66         struct list_head pages_jobs;
  67 };
  68
  69 static void wake(struct dm_kcopyd_client *kc)
  70 {
  71         queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
  72 }
  73
  74 /*
  75  * Obtain one page for the use of kcopyd.
  76  */
  77 static struct page_list *alloc_pl(gfp_t gfp)
  78 {
  79         struct page_list *pl;
  80
  81         pl = kmalloc(sizeof(*pl), gfp);
  82         if (!pl)
  83                 return NULL;
  84
  85         pl->page = alloc_page(gfp);
  86         if (!pl->page) {
  87                 kfree(pl);
  88                 return NULL;
  89         }
  90
  91         return pl;
  92 }
  93
  94 static void free_pl(struct page_list *pl)
  95 {
  96         __free_page(pl->page);
  97         kfree(pl);
  98 }
  99
 100 /*
 101  * Add the provided pages to a client's free page list, releasing
 102  * back to the system any beyond the reserved_pages limit.
 103  */
 104 static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
 105 {
 106         struct page_list *next;
 107
 108         do {
 109                 next = pl->next;
 110
 111                 if (kc->nr_free_pages >= kc->nr_reserved_pages)
 112                         free_pl(pl);
 113                 else {
 114                         pl->next = kc->pages;
 115                         kc->pages = pl;
 116                         kc->nr_free_pages++;
 117                 }
 118
 119                 pl = next;
 120         } while (pl);
 121 }
 122
 123 static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
 124                             unsigned int nr, struct page_list **pages)
 125 {
 126         struct page_list *pl;
 127
 128         *pages = NULL;
 129
 130         do {
 131                 pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
 132                 if (unlikely(!pl)) {
 133                         /* Use reserved pages */
 134                         pl = kc->pages;
 135                         if (unlikely(!pl))
 136                                 goto out_of_memory;
 137                         kc->pages = pl->next;
 138                         kc->nr_free_pages--;
 139                 }
 140                 pl->next = *pages;
 141                 *pages = pl;
 142         } while (--nr);
 143
 144         return 0;
 145
 146 out_of_memory:
 147         if (*pages)
 148                 kcopyd_put_pages(kc, *pages);
 149         return -ENOMEM;
 150 }
 151
 152 /*
 153  * These three functions resize the page pool.
 154  */
 155 static void drop_pages(struct page_list *pl)
 156 {
 157         struct page_list *next;
 158
 159         while (pl) {
 160                 next = pl->next;
 161                 free_pl(pl);
 162                 pl = next;
 163         }
 164 }
 165
 166 /*
 167  * Allocate and reserve nr_pages for the use of a specific client.
 168  */
 169 static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages)
 170 {
 171         unsigned i;
 172         struct page_list *pl = NULL, *next;
 173
 174         for (i = 0; i < nr_pages; i++) {
 175                 next = alloc_pl(GFP_KERNEL);
 176                 if (!next) {
 177                         if (pl)
 178                                 drop_pages(pl);
 179                         return -ENOMEM;
 180                 }
 181                 next->next = pl;
 182                 pl = next;
 183         }
 184
 185         kc->nr_reserved_pages += nr_pages;
 186         kcopyd_put_pages(kc, pl);
 187
 188         return 0;
 189 }
 190
 191 static void client_free_pages(struct dm_kcopyd_client *kc)
 192 {
 193         BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
 194         drop_pages(kc->pages);
 195         kc->pages = NULL;
 196         kc->nr_free_pages = kc->nr_reserved_pages = 0;
 197 }
 198
 199 /*-----------------------------------------------------------------
 200  * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
 201  * for this reason we use a mempool to prevent the client from
 202  * ever having to do io (which could cause a deadlock).
 203  *---------------------------------------------------------------*/
 204 struct kcopyd_job {
 205         struct dm_kcopyd_client *kc;
 206         struct list_head list;
 207         unsigned long flags;
 208
 209         /*
 210          * Error state of the job.
 211          */
 212         int read_err;
 213         unsigned long write_err;
 214
 215         /*
 216          * Either READ or WRITE
 217          */
 218         int rw;
 219         struct dm_io_region source;
 220
 221         /*
 222          * The destinations for the transfer.
 223          */
 224         unsigned int num_dests;
 225         struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
 226
 227         unsigned int nr_pages;
 228         struct page_list *pages;
 229
 230         /*
 231          * Set this to ensure you are notified when the job has
 232          * completed.  'context' is for callback to use.
 233          */
 234         dm_kcopyd_notify_fn fn;
 235         void *context;
 236
 237         /*
 238          * These fields are only used if the job has been split
 239          * into more manageable parts.
 240          */
 241         struct mutex lock;
 242         atomic_t sub_jobs;
 243         sector_t progress;
 244
 245         struct kcopyd_job *master_job;
 246 };
 247
 248 static struct kmem_cache *_job_cache;
 249
 250 int __init dm_kcopyd_init(void)
 251 {
 252         _job_cache = kmem_cache_create("kcopyd_job",
 253                                 sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
 254                                 __alignof__(struct kcopyd_job), 0, NULL);
 255         if (!_job_cache)
 256                 return -ENOMEM;
 257
 258         return 0;
 259 }
 260
 261 void dm_kcopyd_exit(void)
 262 {
 263         kmem_cache_destroy(_job_cache);
 264         _job_cache = NULL;
 265 }
 266
 267 /*
 268  * Functions to push and pop a job onto the head of a given job
 269  * list.
 270  */
 271 static struct kcopyd_job *pop(struct list_head *jobs,
 272                               struct dm_kcopyd_client *kc)
 273 {
 274         struct kcopyd_job *job = NULL;
 275         unsigned long flags;
 276
 277         spin_lock_irqsave(&kc->job_lock, flags);
 278
 279         if (!list_empty(jobs)) {
 280                 job = list_entry(jobs->next, struct kcopyd_job, list);
 281                 list_del(&job->list);
 282         }
 283         spin_unlock_irqrestore(&kc->job_lock, flags);
 284
 285         return job;
 286 }
 287
 288 static void push(struct list_head *jobs, struct kcopyd_job *job)
 289 {
 290         unsigned long flags;
 291         struct dm_kcopyd_client *kc = job->kc;
 292
 293         spin_lock_irqsave(&kc->job_lock, flags);
 294         list_add_tail(&job->list, jobs);
 295         spin_unlock_irqrestore(&kc->job_lock, flags);
 296 }
 297
 298
 299 static void push_head(struct list_head *jobs, struct kcopyd_job *job)
 300 {
 301         unsigned long flags;
 302         struct dm_kcopyd_client *kc = job->kc;
 303
 304         spin_lock_irqsave(&kc->job_lock, flags);
 305         list_add(&job->list, jobs);
 306         spin_unlock_irqrestore(&kc->job_lock, flags);
 307 }
 308
 309 /*
 310  * These three functions process 1 item from the corresponding
 311  * job list.
 312  *
 313  * They return:
 314  * < 0: error
 315  *   0: success
 316  * > 0: can't process yet.
 317  */
 318 static int run_complete_job(struct kcopyd_job *job)
 319 {
 320         void *context = job->context;
 321         int read_err = job->read_err;
 322         unsigned long write_err = job->write_err;
 323         dm_kcopyd_notify_fn fn = job->fn;
 324         struct dm_kcopyd_client *kc = job->kc;
 325
 326         if (job->pages)
 327                 kcopyd_put_pages(kc, job->pages);
 328         /*
 329          * If this is the master job, the sub jobs have already
 330          * completed so we can free everything.
 331          */
 332         if (job->master_job == job)
 333                 mempool_free(job, kc->job_pool);
 334         fn(read_err, write_err, context);
 335
 336         if (atomic_dec_and_test(&kc->nr_jobs))
 337                 wake_up(&kc->destroyq);
 338
 339         return 0;
 340 }
 341
 342 static void complete_io(unsigned long error, void *context)
 343 {
 344         struct kcopyd_job *job = (struct kcopyd_job *) context;
 345         struct dm_kcopyd_client *kc = job->kc;
 346
 347         if (error) {
 348                 if (job->rw == WRITE)
 349                         job->write_err |= error;
 350                 else
 351                         job->read_err = 1;
 352
 353                 if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
 354                         push(&kc->complete_jobs, job);
 355                         wake(kc);
 356                         return;
 357                 }
 358         }
 359
 360         if (job->rw == WRITE)
 361                 push(&kc->complete_jobs, job);
 362
 363         else {
 364                 job->rw = WRITE;
 365                 push(&kc->io_jobs, job);
 366         }
 367
 368         wake(kc);
 369 }
 370
 371 /*
 372  * Request io on as many buffer heads as we can currently get for
 373  * a particular job.
 374  */
 375 static int run_io_job(struct kcopyd_job *job)
 376 {
 377         int r;
 378         struct dm_io_request io_req = {
 379                 .bi_rw = job->rw,
 380                 .mem.type = DM_IO_PAGE_LIST,
 381                 .mem.ptr.pl = job->pages,
 382                 .mem.offset = 0,
 383                 .notify.fn = complete_io,
 384                 .notify.context = job,
 385                 .client = job->kc->io_client,
 386         };
 387
 388         if (job->rw == READ)
 389                 r = dm_io(&io_req, 1, &job->source, NULL);
 390         else
 391                 r = dm_io(&io_req, job->num_dests, job->dests, NULL);
 392
 393         return r;
 394 }
 395
 396 static int run_pages_job(struct kcopyd_job *job)
 397 {
 398         int r;
 399
 400         job->nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
 401         r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
 402         if (!r) {
 403                 /* this job is ready for io */
 404                 push(&job->kc->io_jobs, job);
 405                 return 0;
 406         }
 407
 408         if (r == -ENOMEM)
 409                 /* can't complete now */
 410                 return 1;
 411
 412         return r;
 413 }
 414
 415 /*
 416  * Run through a list for as long as possible.  Returns the count
 417  * of successful jobs.
 418  */
 419 static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
 420                         int (*fn) (struct kcopyd_job *))
 421 {
 422         struct kcopyd_job *job;
 423         int r, count = 0;
 424
 425         while ((job = pop(jobs, kc))) {
 426
 427                 r = fn(job);
 428
 429                 if (r < 0) {
 430                         /* error this rogue job */
 431                         if (job->rw == WRITE)
 432                                 job->write_err = (unsigned long) -1L;
 433                         else
 434                                 job->read_err = 1;
 435                         push(&kc->complete_jobs, job);
 436                         break;
 437                 }
 438
 439                 if (r > 0) {
 440                         /*
 441                          * We couldn't service this job ATM, so
 442                          * push this job back onto the list.
 443                          */
 444                         push_head(jobs, job);
 445                         break;
 446                 }
 447
 448                 count++;
 449         }
 450
 451         return count;
 452 }
 453
 454 /*
 455  * kcopyd does this every time it's woken up.
 456  */
 457 static void do_work(struct work_struct *work)
 458 {
 459         struct dm_kcopyd_client *kc = container_of(work,
 460                                         struct dm_kcopyd_client, kcopyd_work);
 461         struct blk_plug plug;
 462
 463         /*
 464          * The order that these are called is *very* important.
 465          * complete jobs can free some pages for pages jobs.
 466          * Pages jobs when successful will jump onto the io jobs
 467          * list.  io jobs call wake when they complete and it all
 468          * starts again.
 469          */
 470         blk_start_plug(&plug);
 471         process_jobs(&kc->complete_jobs, kc, run_complete_job);
 472         process_jobs(&kc->pages_jobs, kc, run_pages_job);
 473         process_jobs(&kc->io_jobs, kc, run_io_job);
 474         blk_finish_plug(&plug);
 475 }
 476
 477 /*
 478  * If we are copying a small region we just dispatch a single job
 479  * to do the copy, otherwise the io has to be split up into many
 480  * jobs.
 481  */
 482 static void dispatch_job(struct kcopyd_job *job)
 483 {
 484         struct dm_kcopyd_client *kc = job->kc;
 485         atomic_inc(&kc->nr_jobs);
 486         if (unlikely(!job->source.count))
 487                 push(&kc->complete_jobs, job);
 488         else
 489                 push(&kc->pages_jobs, job);
 490         wake(kc);
 491 }
 492
 493 static void segment_complete(int read_err, unsigned long write_err,
 494                              void *context)
 495 {
 496         /* FIXME: tidy this function */
 497         sector_t progress = 0;
 498         sector_t count = 0;
 499         struct kcopyd_job *sub_job = (struct kcopyd_job *) context;
 500         struct kcopyd_job *job = sub_job->master_job;
 501         struct dm_kcopyd_client *kc = job->kc;
 502
 503         mutex_lock(&job->lock);
 504
 505         /* update the error */
 506         if (read_err)
 507                 job->read_err = 1;
 508
 509         if (write_err)
 510                 job->write_err |= write_err;
 511
 512         /*
 513          * Only dispatch more work if there hasn't been an error.
 514          */
 515         if ((!job->read_err && !job->write_err) ||
 516             test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
 517                 /* get the next chunk of work */
 518                 progress = job->progress;
 519                 count = job->source.count - progress;
 520                 if (count) {
 521                         if (count > SUB_JOB_SIZE)
 522                                 count = SUB_JOB_SIZE;
 523
 524                         job->progress += count;
 525                 }
 526         }
 527         mutex_unlock(&job->lock);
 528
 529         if (count) {
 530                 int i;
 531
 532                 *sub_job = *job;
 533                 sub_job->source.sector += progress;
 534                 sub_job->source.count = count;
 535
 536                 for (i = 0; i < job->num_dests; i++) {
 537                         sub_job->dests[i].sector += progress;
 538                         sub_job->dests[i].count = count;
 539                 }
 540
 541                 sub_job->fn = segment_complete;
 542                 sub_job->context = sub_job;
 543                 dispatch_job(sub_job);
 544
 545         } else if (atomic_dec_and_test(&job->sub_jobs)) {
 546
 547                 /*
 548                  * Queue the completion callback to the kcopyd thread.
 549                  *
 550                  * Some callers assume that all the completions are called
 551                  * from a single thread and don't race with each other.
 552                  *
 553                  * We must not call the callback directly here because this
 554                  * code may not be executing in the thread.
 555                  */
 556                 push(&kc->complete_jobs, job);
 557                 wake(kc);
 558         }
 559 }
 560
 561 /*
 562  * Create some sub jobs to share the work between them.
 563  */
 564 static void split_job(struct kcopyd_job *master_job)
 565 {
 566         int i;
 567
 568         atomic_inc(&master_job->kc->nr_jobs);
 569
 570         atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
 571         for (i = 0; i < SPLIT_COUNT; i++) {
 572                 master_job[i + 1].master_job = master_job;
 573                 segment_complete(0, 0u, &master_job[i + 1]);
 574         }
 575 }
 576
 577 int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 578                    unsigned int num_dests, struct dm_io_region *dests,
 579                    unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
 580 {
 581         struct kcopyd_job *job;
 582
 583         /*
 584          * Allocate an array of jobs consisting of one master job
 585          * followed by SPLIT_COUNT sub jobs.
 586          */
 587         job = mempool_alloc(kc->job_pool, GFP_NOIO);
 588
 589         /*
 590          * set up for the read.
 591          */
 592         job->kc = kc;
 593         job->flags = flags;
 594         job->read_err = 0;
 595         job->write_err = 0;
 596         job->rw = READ;
 597
 598         job->source = *from;
 599
 600         job->num_dests = num_dests;
 601         memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
 602
 603         job->nr_pages = 0;
 604         job->pages = NULL;
 605
 606         job->fn = fn;
 607         job->context = context;
 608         job->master_job = job;
 609
 610         if (job->source.count <= SUB_JOB_SIZE)
 611                 dispatch_job(job);
 612         else {
 613                 mutex_init(&job->lock);
 614                 job->progress = 0;
 615                 split_job(job);
 616         }
 617
 618         return 0;
 619 }
 620 EXPORT_SYMBOL(dm_kcopyd_copy);
 621
 622 /*
 623  * Cancels a kcopyd job, eg. someone might be deactivating a
 624  * mirror.
 625  */
 626 #if 0
 627 int kcopyd_cancel(struct kcopyd_job *job, int block)
 628 {
 629         /* FIXME: finish */
 630         return -1;
 631 }
 632 #endif  /*  0  */
 633
 634 /*-----------------------------------------------------------------
 635  * Client setup
 636  *---------------------------------------------------------------*/
 637 struct dm_kcopyd_client *dm_kcopyd_client_create(void)
 638 {
 639         int r = -ENOMEM;
 640         struct dm_kcopyd_client *kc;
 641
 642         kc = kmalloc(sizeof(*kc), GFP_KERNEL);
 643         if (!kc)
 644                 return ERR_PTR(-ENOMEM);
 645
 646         spin_lock_init(&kc->job_lock);
 647         INIT_LIST_HEAD(&kc->complete_jobs);
 648         INIT_LIST_HEAD(&kc->io_jobs);
 649         INIT_LIST_HEAD(&kc->pages_jobs);
 650
 651         kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
 652         if (!kc->job_pool)
 653                 goto bad_slab;
 654
 655         INIT_WORK(&kc->kcopyd_work, do_work);
 656         kc->kcopyd_wq = alloc_workqueue("kcopyd",
 657                                         WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
 658         if (!kc->kcopyd_wq)
 659                 goto bad_workqueue;
 660
 661         kc->pages = NULL;
 662         kc->nr_reserved_pages = kc->nr_free_pages = 0;
 663         r = client_reserve_pages(kc, RESERVE_PAGES);
 664         if (r)
 665                 goto bad_client_pages;
 666
 667         kc->io_client = dm_io_client_create();
 668         if (IS_ERR(kc->io_client)) {
 669                 r = PTR_ERR(kc->io_client);
 670                 goto bad_io_client;
 671         }
 672
 673         init_waitqueue_head(&kc->destroyq);
 674         atomic_set(&kc->nr_jobs, 0);
 675
 676         return kc;
 677
 678 bad_io_client:
 679         client_free_pages(kc);
 680 bad_client_pages:
 681         destroy_workqueue(kc->kcopyd_wq);
 682 bad_workqueue:
 683         mempool_destroy(kc->job_pool);
 684 bad_slab:
 685         kfree(kc);
 686
 687         return ERR_PTR(r);
 688 }
 689 EXPORT_SYMBOL(dm_kcopyd_client_create);
 690
 691 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
 692 {
 693         /* Wait for completion of all jobs submitted by this client. */
 694         wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
 695
 696         BUG_ON(!list_empty(&kc->complete_jobs));
 697         BUG_ON(!list_empty(&kc->io_jobs));
 698         BUG_ON(!list_empty(&kc->pages_jobs));
 699         destroy_workqueue(kc->kcopyd_wq);
 700         dm_io_client_destroy(kc->io_client);
 701         client_free_pages(kc);
 702         mempool_destroy(kc->job_pool);
 703         kfree(kc);
 704 }
 705 EXPORT_SYMBOL(dm_kcopyd_client_destroy);