Merge tag 'for-5.7/block-2020-03-29' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Mar 2020 18:20:13 +0000 (11:20 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Mar 2020 18:20:13 +0000 (11:20 -0700)
Pull block updates from Jens Axboe:

 - Online capacity resizing (Balbir)

 - Number of hardware queue change fixes (Bart)

 - null_blk fault injection addition (Bart)

 - Cleanup of queue allocation, unifying the node/no-node API
   (Christoph)

 - Cleanup of genhd, moving code to where it makes sense (Christoph)

 - Cleanup of the partition handling code (Christoph)

 - disk stat fixes/improvements (Konstantin)

 - BFQ improvements (Paolo)

 - Various fixes and improvements

* tag 'for-5.7/block-2020-03-29' of git://git.kernel.dk/linux-block: (72 commits)
  block: return NULL in blk_alloc_queue() on error
  block: move bio_map_* to blk-map.c
  Revert "blkdev: check for valid request queue before issuing flush"
  block: simplify queue allocation
  bcache: pass the make_request methods to blk_queue_make_request
  null_blk: use blk_mq_init_queue_data
  block: add a blk_mq_init_queue_data helper
  block: move the ->devnode callback to struct block_device_operations
  block: move the part_stat* helpers from genhd.h to a new header
  block: move block layer internals out of include/linux/genhd.h
  block: move guard_bio_eod to bio.c
  block: unexport get_gendisk
  block: unexport disk_map_sector_rcu
  block: unexport disk_get_part
  block: mark part_in_flight and part_in_flight_rw static
  block: mark block_depr static
  block: factor out requeue handling from dispatch code
  block/diskstats: replace time_in_queue with sum of request times
  block/diskstats: accumulate all per-cpu counters in one pass
  block/diskstats: more accurate approximation of io_ticks for slow disks
  ...

1  2 
block/blk-iocost.c
block/genhd.c
drivers/block/virtio_blk.c
drivers/scsi/sd.c
include/linux/fs.h
include/linux/genhd.h

diff --combined block/blk-iocost.c
@@@ -46,9 -46,6 +46,6 @@@
   * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
   * device-specific coefficients.
   *
-  * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
-  * device-specific coefficients.
-  *
   * 2. Control Strategy
   *
   * The device virtual time (vtime) is used as the primary control metric.
@@@ -1318,7 -1315,7 +1315,7 @@@ static bool iocg_is_idle(struct ioc_gq 
                return false;
  
        /* is something in flight? */
 -      if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
 +      if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime))
                return false;
  
        return true;
diff --combined block/genhd.c
@@@ -4,6 -4,7 +4,7 @@@
   */
  
  #include <linux/module.h>
+ #include <linux/ctype.h>
  #include <linux/fs.h>
  #include <linux/genhd.h>
  #include <linux/kdev_t.h>
@@@ -26,7 -27,7 +27,7 @@@
  #include "blk.h"
  
  static DEFINE_MUTEX(block_class_lock);
- struct kobject *block_depr;
+ static struct kobject *block_depr;
  
  /* for extended dynamic devt allocation, currently only one major is used */
  #define NR_EXT_DEVT           (1 << MINORBITS)
@@@ -46,6 -47,78 +47,78 @@@ static void disk_add_events(struct gend
  static void disk_del_events(struct gendisk *disk);
  static void disk_release_events(struct gendisk *disk);
  
+ /*
+  * Set disk capacity and notify if the size is not currently
+  * zero and will not be set to zero
+  */
+ void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
+                                       bool revalidate)
+ {
+       sector_t capacity = get_capacity(disk);
+       set_capacity(disk, size);
+       if (revalidate)
+               revalidate_disk(disk);
+       if (capacity != size && capacity != 0 && size != 0) {
+               char *envp[] = { "RESIZE=1", NULL };
+               kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+       }
+ }
+ EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify);
+ /*
+  * Format the device name of the indicated disk into the supplied buffer and
+  * return a pointer to that same buffer for convenience.
+  */
+ char *disk_name(struct gendisk *hd, int partno, char *buf)
+ {
+       if (!partno)
+               snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
+       else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
+               snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
+       else
+               snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
+       return buf;
+ }
+ const char *bdevname(struct block_device *bdev, char *buf)
+ {
+       return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
+ }
+ EXPORT_SYMBOL(bdevname);
+ #ifdef CONFIG_SMP
+ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+ {
+       int cpu;
+       memset(stat, 0, sizeof(struct disk_stats));
+       for_each_possible_cpu(cpu) {
+               struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+               int group;
+               for (group = 0; group < NR_STAT_GROUPS; group++) {
+                       stat->nsecs[group] += ptr->nsecs[group];
+                       stat->sectors[group] += ptr->sectors[group];
+                       stat->ios[group] += ptr->ios[group];
+                       stat->merges[group] += ptr->merges[group];
+               }
+               stat->io_ticks += ptr->io_ticks;
+       }
+ }
+ #else /* CONFIG_SMP */
+ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
+ {
+       memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
+ }
+ #endif /* CONFIG_SMP */
  void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
  {
        if (queue_is_mq(q))
@@@ -66,7 -139,8 +139,8 @@@ void part_dec_in_flight(struct request_
                part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
  }
  
- unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
+ static unsigned int part_in_flight(struct request_queue *q,
+               struct hd_struct *part)
  {
        int cpu;
        unsigned int inflight;
        return inflight;
  }
  
- void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
-                      unsigned int inflight[2])
static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+               unsigned int inflight[2])
  {
        int cpu;
  
@@@ -143,7 -217,6 +217,6 @@@ struct hd_struct *disk_get_part(struct 
  
        return part;
  }
- EXPORT_SYMBOL_GPL(disk_get_part);
  
  /**
   * disk_part_iter_init - initialize partition iterator
@@@ -299,44 -372,7 +372,43 @@@ struct hd_struct *disk_map_sector_rcu(s
        }
        return &disk->part0;
  }
- EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
  
 +/**
 + * disk_has_partitions
 + * @disk: gendisk of interest
 + *
 + * Walk through the partition table and check if valid partition exists.
 + *
 + * CONTEXT:
 + * Don't care.
 + *
 + * RETURNS:
 + * True if the gendisk has at least one valid non-zero size partition.
 + * Otherwise false.
 + */
 +bool disk_has_partitions(struct gendisk *disk)
 +{
 +      struct disk_part_tbl *ptbl;
 +      int i;
 +      bool ret = false;
 +
 +      rcu_read_lock();
 +      ptbl = rcu_dereference(disk->part_tbl);
 +
 +      /* Iterate partitions skipping the whole device at index 0 */
 +      for (i = 1; i < ptbl->len; i++) {
 +              if (rcu_dereference(ptbl->part[i])) {
 +                      ret = true;
 +                      break;
 +              }
 +      }
 +
 +      rcu_read_unlock();
 +
 +      return ret;
 +}
 +EXPORT_SYMBOL_GPL(disk_has_partitions);
 +
  /*
   * Can be deleted altogether. Later.
   *
@@@ -944,7 -980,6 +1016,6 @@@ struct gendisk *get_gendisk(dev_t devt
        }
        return disk;
  }
- EXPORT_SYMBOL(get_gendisk);
  
  /**
   * bdget_disk - do bdget() by gendisk and partition number
@@@ -1190,6 -1225,67 +1261,67 @@@ static ssize_t disk_ro_show(struct devi
        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
  }
  
+ ssize_t part_size_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
+ {
+       struct hd_struct *p = dev_to_part(dev);
+       return sprintf(buf, "%llu\n",
+               (unsigned long long)part_nr_sects_read(p));
+ }
+ ssize_t part_stat_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
+ {
+       struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q = part_to_disk(p)->queue;
+       struct disk_stats stat;
+       unsigned int inflight;
+       part_stat_read_all(p, &stat);
+       inflight = part_in_flight(q, p);
+       return sprintf(buf,
+               "%8lu %8lu %8llu %8u "
+               "%8lu %8lu %8llu %8u "
+               "%8u %8u %8u "
+               "%8lu %8lu %8llu %8u "
+               "%8lu %8u"
+               "\n",
+               stat.ios[STAT_READ],
+               stat.merges[STAT_READ],
+               (unsigned long long)stat.sectors[STAT_READ],
+               (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+               stat.ios[STAT_WRITE],
+               stat.merges[STAT_WRITE],
+               (unsigned long long)stat.sectors[STAT_WRITE],
+               (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
+               inflight,
+               jiffies_to_msecs(stat.io_ticks),
+               (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+                                     stat.nsecs[STAT_WRITE] +
+                                     stat.nsecs[STAT_DISCARD] +
+                                     stat.nsecs[STAT_FLUSH],
+                                               NSEC_PER_MSEC),
+               stat.ios[STAT_DISCARD],
+               stat.merges[STAT_DISCARD],
+               (unsigned long long)stat.sectors[STAT_DISCARD],
+               (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+               stat.ios[STAT_FLUSH],
+               (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
+ }
+ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
+ {
+       struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q = part_to_disk(p)->queue;
+       unsigned int inflight[2];
+       part_in_flight_rw(q, p, inflight);
+       return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
+ }
  static ssize_t disk_capability_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
  {
@@@ -1228,10 -1324,33 +1360,33 @@@ static DEVICE_ATTR(capability, 0444, di
  static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
  static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
  static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
  #ifdef CONFIG_FAIL_MAKE_REQUEST
+ ssize_t part_fail_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
+ {
+       struct hd_struct *p = dev_to_part(dev);
+       return sprintf(buf, "%d\n", p->make_it_fail);
+ }
+ ssize_t part_fail_store(struct device *dev,
+                       struct device_attribute *attr,
+                       const char *buf, size_t count)
+ {
+       struct hd_struct *p = dev_to_part(dev);
+       int i;
+       if (count > 0 && sscanf(buf, "%d", &i) > 0)
+               p->make_it_fail = (i == 0) ? 0 : 1;
+       return count;
+ }
  static struct device_attribute dev_attr_fail =
        __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
- #endif
+ #endif /* CONFIG_FAIL_MAKE_REQUEST */
  #ifdef CONFIG_FAIL_IO_TIMEOUT
  static struct device_attribute dev_attr_fail_timeout =
        __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
@@@ -1378,8 -1497,8 +1533,8 @@@ static char *block_devnode(struct devic
  {
        struct gendisk *disk = dev_to_disk(dev);
  
-       if (disk->devnode)
-               return disk->devnode(disk, mode);
+       if (disk->fops->devnode)
+               return disk->fops->devnode(disk, mode);
        return NULL;
  }
  
@@@ -1405,6 -1524,7 +1560,7 @@@ static int diskstats_show(struct seq_fi
        struct hd_struct *hd;
        char buf[BDEVNAME_SIZE];
        unsigned int inflight;
+       struct disk_stats stat;
  
        /*
        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
  
        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
        while ((hd = disk_part_iter_next(&piter))) {
+               part_stat_read_all(hd, &stat);
                inflight = part_in_flight(gp->queue, hd);
                seq_printf(seqf, "%4d %7d %s "
                           "%lu %lu %lu %u "
                           "%lu %lu %lu %u "
                           "\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
-                          part_stat_read(hd, ios[STAT_READ]),
-                          part_stat_read(hd, merges[STAT_READ]),
-                          part_stat_read(hd, sectors[STAT_READ]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_READ),
-                          part_stat_read(hd, ios[STAT_WRITE]),
-                          part_stat_read(hd, merges[STAT_WRITE]),
-                          part_stat_read(hd, sectors[STAT_WRITE]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+                          stat.ios[STAT_READ],
+                          stat.merges[STAT_READ],
+                          stat.sectors[STAT_READ],
+                          (unsigned int)div_u64(stat.nsecs[STAT_READ],
+                                                       NSEC_PER_MSEC),
+                          stat.ios[STAT_WRITE],
+                          stat.merges[STAT_WRITE],
+                          stat.sectors[STAT_WRITE],
+                          (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+                                                       NSEC_PER_MSEC),
                           inflight,
-                          jiffies_to_msecs(part_stat_read(hd, io_ticks)),
-                          jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
-                          part_stat_read(hd, ios[STAT_DISCARD]),
-                          part_stat_read(hd, merges[STAT_DISCARD]),
-                          part_stat_read(hd, sectors[STAT_DISCARD]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
-                          part_stat_read(hd, ios[STAT_FLUSH]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+                          jiffies_to_msecs(stat.io_ticks),
+                          (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+                                                stat.nsecs[STAT_WRITE] +
+                                                stat.nsecs[STAT_DISCARD] +
+                                                stat.nsecs[STAT_FLUSH],
+                                                       NSEC_PER_MSEC),
+                          stat.ios[STAT_DISCARD],
+                          stat.merges[STAT_DISCARD],
+                          stat.sectors[STAT_DISCARD],
+                          (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+                                                NSEC_PER_MSEC),
+                          stat.ios[STAT_FLUSH],
+                          (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+                                                NSEC_PER_MSEC)
                        );
        }
        disk_part_iter_exit(&piter);
@@@ -1499,7 -1629,6 +1665,6 @@@ dev_t blk_lookup_devt(const char *name
        class_dev_iter_exit(&iter);
        return devt;
  }
- EXPORT_SYMBOL(blk_lookup_devt);
  
  struct gendisk *__alloc_disk_node(int minors, int node_id)
  {
@@@ -245,20 -245,13 +245,20 @@@ static blk_status_t virtio_queue_rq(str
        err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
        if (err) {
                virtqueue_kick(vblk->vqs[qid].vq);
 -              blk_mq_stop_hw_queue(hctx);
 +              /* Don't stop the queue if -ENOMEM: we may have failed to
 +               * bounce the buffer due to global resource outage.
 +               */
 +              if (err == -ENOSPC)
 +                      blk_mq_stop_hw_queue(hctx);
                spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 -              /* Out of mem doesn't actually happen, since we fall back
 -               * to direct descriptors */
 -              if (err == -ENOMEM || err == -ENOSPC)
 +              switch (err) {
 +              case -ENOSPC:
                        return BLK_STS_DEV_RESOURCE;
 -              return BLK_STS_IOERR;
 +              case -ENOMEM:
 +                      return BLK_STS_RESOURCE;
 +              default:
 +                      return BLK_STS_IOERR;
 +              }
        }
  
        if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@@ -388,18 -381,15 +388,15 @@@ static void virtblk_update_capacity(str
                   cap_str_10,
                   cap_str_2);
  
-       set_capacity(vblk->disk, capacity);
+       set_capacity_revalidate_and_notify(vblk->disk, capacity, true);
  }
  
  static void virtblk_config_changed_work(struct work_struct *work)
  {
        struct virtio_blk *vblk =
                container_of(work, struct virtio_blk, config_work);
-       char *envp[] = { "RESIZE=1", NULL };
  
        virtblk_update_capacity(vblk, true);
-       revalidate_disk(vblk->disk);
-       kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
  }
  
  static void virtblk_config_changed(struct virtio_device *vdev)
diff --combined drivers/scsi/sd.c
@@@ -3169,11 -3169,9 +3169,11 @@@ static int sd_revalidate_disk(struct ge
        if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
                q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
                rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
 -      } else
 +      } else {
 +              q->limits.io_opt = 0;
                rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
                                      (sector_t)BLK_DEF_MAX_SECTORS);
 +      }
  
        /* Do not exceed controller limit */
        rw_max = min(rw_max, queue_max_hw_sectors(q));
  
        sdkp->first_scan = 0;
  
-       set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity));
+       set_capacity_revalidate_and_notify(disk,
+               logical_to_sectors(sdp, sdkp->capacity), false);
        sd_config_write_same(sdkp);
        kfree(buffer);
  
diff --combined include/linux/fs.h
@@@ -698,7 -698,6 +698,7 @@@ struct inode 
                struct rcu_head         i_rcu;
        };
        atomic64_t              i_version;
 +      atomic64_t              i_sequence; /* see futex */
        atomic_t                i_count;
        atomic_t                i_dio_count;
        atomic_t                i_writecount;
@@@ -2700,7 -2699,6 +2700,6 @@@ static inline void unregister_chrdev(un
  
  #ifdef CONFIG_BLOCK
  #define BLKDEV_MAJOR_MAX      512
- extern const char *__bdevname(dev_t, char *buffer);
  extern const char *bdevname(struct block_device *bdev, char *buffer);
  extern struct block_device *lookup_bdev(const char *);
  extern void blkdev_show(struct seq_file *,off_t);
diff --combined include/linux/genhd.h
  #define part_to_dev(part)     (&((part)->__dev))
  
  extern struct device_type part_type;
- extern struct kobject *block_depr;
  extern struct class block_class;
  
- enum {
- /* These three have identical behaviour; use the second one if DOS FDISK gets
-    confused about extended/logical partitions starting past cylinder 1023. */
-       DOS_EXTENDED_PARTITION = 5,
-       LINUX_EXTENDED_PARTITION = 0x85,
-       WIN98_EXTENDED_PARTITION = 0x0f,
-       SUN_WHOLE_DISK = DOS_EXTENDED_PARTITION,
-       LINUX_SWAP_PARTITION = 0x82,
-       LINUX_DATA_PARTITION = 0x83,
-       LINUX_LVM_PARTITION = 0x8e,
-       LINUX_RAID_PARTITION = 0xfd,    /* autodetect RAID partition */
-       SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION,
-       NEW_SOLARIS_X86_PARTITION = 0xbf,
-       DM6_AUX1PARTITION = 0x51,       /* no DDO:  use xlated geom */
-       DM6_AUX3PARTITION = 0x53,       /* no DDO:  use xlated geom */
-       DM6_PARTITION = 0x54,           /* has DDO: use xlated geom & offset */
-       EZD_PARTITION = 0x55,           /* EZ-DRIVE */
-       FREEBSD_PARTITION = 0xa5,       /* FreeBSD Partition ID */
-       OPENBSD_PARTITION = 0xa6,       /* OpenBSD Partition ID */
-       NETBSD_PARTITION = 0xa9,        /* NetBSD Partition ID */
-       BSDI_PARTITION = 0xb7,          /* BSDI Partition ID */
-       MINIX_PARTITION = 0x81,         /* Minix Partition ID */
-       UNIXWARE_PARTITION = 0x63,      /* Same as GNU_HURD and SCO Unix */
- };
  #define DISK_MAX_PARTS                        256
  #define DISK_NAME_LEN                 32
  
  #include <linux/fs.h>
  #include <linux/workqueue.h>
  
- struct partition {
-       unsigned char boot_ind;         /* 0x80 - active */
-       unsigned char head;             /* starting head */
-       unsigned char sector;           /* starting sector */
-       unsigned char cyl;              /* starting cylinder */
-       unsigned char sys_ind;          /* What partition type */
-       unsigned char end_head;         /* end head */
-       unsigned char end_sector;       /* end sector */
-       unsigned char end_cyl;          /* end cylinder */
-       __le32 start_sect;      /* starting sector counting from 0 */
-       __le32 nr_sects;                /* nr of sectors in partition */
- } __attribute__((packed));
  struct disk_stats {
        u64 nsecs[NR_STAT_GROUPS];
        unsigned long sectors[NR_STAT_GROUPS];
        unsigned long ios[NR_STAT_GROUPS];
        unsigned long merges[NR_STAT_GROUPS];
        unsigned long io_ticks;
-       unsigned long time_in_queue;
        local_t in_flight[2];
  };
  
@@@ -133,17 -88,64 +88,64 @@@ struct hd_struct 
        struct rcu_work rcu_work;
  };
  
- #define GENHD_FL_REMOVABLE                    1
- /* 2 is unused */
- #define GENHD_FL_MEDIA_CHANGE_NOTIFY          4
- #define GENHD_FL_CD                           8
- #define GENHD_FL_UP                           16
- #define GENHD_FL_SUPPRESS_PARTITION_INFO      32
- #define GENHD_FL_EXT_DEVT                     64 /* allow extended devt */
- #define GENHD_FL_NATIVE_CAPACITY              128
- #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE   256
- #define GENHD_FL_NO_PART_SCAN                 512
- #define GENHD_FL_HIDDEN                               1024
+ /**
+  * DOC: genhd capability flags
+  *
+  * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device
+  * gives access to removable media.
+  * When set, the device remains present even when media is not
+  * inserted.
+  * Must not be set for devices which are removed entirely when the
+  * media is removed.
+  *
+  * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style
+  * device.
+  * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl.
+  *
+  * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up",
+  * with a similar meaning to network interfaces.
+  *
+  * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include
+  * partition information in ``/proc/partitions`` or in the output of
+  * printk_all_partitions().
+  * Used for the null block device and some MMC devices.
+  *
+  * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended
+  * dynamic ``dev_t``, i.e. it wants extended device numbers
+  * (``BLOCK_EXT_MAJOR``).
+  * This affects the maximum number of partitions.
+  *
+  * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the
+  * partition table, the device's capacity has been extended to its
+  * native capacity; i.e. the device has hidden capacity used by one
+  * of the partitions (this is a flag used so that native capacity is
+  * only ever unlocked once).
+  *
+  * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is
+  * blocked whenever a writer holds an exclusive lock.
+  *
+  * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled.
+  * Used for loop devices in their default settings and some MMC
+  * devices.
+  *
+  * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it
+  * doesn't produce events, doesn't appear in sysfs, and doesn't have
+  * an associated ``bdev``.
+  * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and
+  * ``GENHD_FL_NO_PART_SCAN``.
+  * Used for multipath devices.
+  */
+ #define GENHD_FL_REMOVABLE                    0x0001
+ /* 2 is unused (used to be GENHD_FL_DRIVERFS) */
+ /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */
+ #define GENHD_FL_CD                           0x0008
+ #define GENHD_FL_UP                           0x0010
+ #define GENHD_FL_SUPPRESS_PARTITION_INFO      0x0020
+ #define GENHD_FL_EXT_DEVT                     0x0040
+ #define GENHD_FL_NATIVE_CAPACITY              0x0080
+ #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE   0x0100
+ #define GENHD_FL_NO_PART_SCAN                 0x0200
+ #define GENHD_FL_HIDDEN                               0x0400
  
  enum {
        DISK_EVENT_MEDIA_CHANGE                 = 1 << 0, /* media changed */
@@@ -189,7 -191,6 +191,6 @@@ struct gendisk 
                                           * disks that can't be partitioned. */
  
        char disk_name[DISK_NAME_LEN];  /* name of major driver */
-       char *(*devnode)(struct gendisk *gd, umode_t *mode);
  
        unsigned short events;          /* supported events */
        unsigned short event_flags;     /* flags related to event processing */
@@@ -245,6 -246,18 +246,6 @@@ static inline bool disk_part_scan_enabl
                !(disk->flags & GENHD_FL_NO_PART_SCAN);
  }
  
 -static inline bool disk_has_partitions(struct gendisk *disk)
 -{
 -      bool ret = false;
 -
 -      rcu_read_lock();
 -      if (rcu_dereference(disk->part_tbl)->len > 1)
 -              ret = true;
 -      rcu_read_unlock();
 -
 -      return ret;
 -}
 -
  static inline dev_t disk_devt(struct gendisk *disk)
  {
        return MKDEV(disk->major, disk->first_minor);
@@@ -283,144 -296,6 +284,7 @@@ extern void disk_part_iter_init(struct 
                                 struct gendisk *disk, unsigned int flags);
  extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
  extern void disk_part_iter_exit(struct disk_part_iter *piter);
- extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
-                                            sector_t sector);
- bool disk_has_partitions(struct gendisk *disk);
- /*
-  * Macros to operate on percpu disk statistics:
-  *
-  * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
-  * and should be called between disk_stat_lock() and
-  * disk_stat_unlock().
-  *
-  * part_stat_read() can be called at any time.
-  *
-  * part_stat_{add|set_all}() and {init|free}_part_stats are for
-  * internal use only.
-  */
- #ifdef        CONFIG_SMP
- #define part_stat_lock()      ({ rcu_read_lock(); get_cpu(); })
- #define part_stat_unlock()    do { put_cpu(); rcu_read_unlock(); } while (0)
- #define part_stat_get_cpu(part, field, cpu)                                   \
-       (per_cpu_ptr((part)->dkstats, (cpu))->field)
- #define part_stat_get(part, field)                                    \
-       part_stat_get_cpu(part, field, smp_processor_id())
- #define part_stat_read(part, field)                                   \
- ({                                                                    \
-       typeof((part)->dkstats->field) res = 0;                         \
-       unsigned int _cpu;                                              \
-       for_each_possible_cpu(_cpu)                                     \
-               res += per_cpu_ptr((part)->dkstats, _cpu)->field;       \
-       res;                                                            \
- })
- static inline void part_stat_set_all(struct hd_struct *part, int value)
- {
-       int i;
-       for_each_possible_cpu(i)
-               memset(per_cpu_ptr(part->dkstats, i), value,
-                               sizeof(struct disk_stats));
- }
- static inline int init_part_stats(struct hd_struct *part)
- {
-       part->dkstats = alloc_percpu(struct disk_stats);
-       if (!part->dkstats)
-               return 0;
-       return 1;
- }
- static inline void free_part_stats(struct hd_struct *part)
- {
-       free_percpu(part->dkstats);
- }
- #else /* !CONFIG_SMP */
- #define part_stat_lock()      ({ rcu_read_lock(); 0; })
- #define part_stat_unlock()    rcu_read_unlock()
- #define part_stat_get(part, field)            ((part)->dkstats.field)
- #define part_stat_get_cpu(part, field, cpu)   part_stat_get(part, field)
- #define part_stat_read(part, field)           part_stat_get(part, field)
- static inline void part_stat_set_all(struct hd_struct *part, int value)
- {
-       memset(&part->dkstats, value, sizeof(struct disk_stats));
- }
- static inline int init_part_stats(struct hd_struct *part)
- {
-       return 1;
- }
- static inline void free_part_stats(struct hd_struct *part)
- {
- }
- #endif /* CONFIG_SMP */
- #define part_stat_read_msecs(part, which)                             \
-       div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
- #define part_stat_read_accum(part, field)                             \
-       (part_stat_read(part, field[STAT_READ]) +                       \
-        part_stat_read(part, field[STAT_WRITE]) +                      \
-        part_stat_read(part, field[STAT_DISCARD]))
- #define __part_stat_add(part, field, addnd)                           \
-       (part_stat_get(part, field) += (addnd))
- #define part_stat_add(part, field, addnd)     do {                    \
-       __part_stat_add((part), field, addnd);                          \
-       if ((part)->partno)                                             \
-               __part_stat_add(&part_to_disk((part))->part0,           \
-                               field, addnd);                          \
- } while (0)
- #define part_stat_dec(gendiskp, field)                                        \
-       part_stat_add(gendiskp, field, -1)
- #define part_stat_inc(gendiskp, field)                                        \
-       part_stat_add(gendiskp, field, 1)
- #define part_stat_sub(gendiskp, field, subnd)                         \
-       part_stat_add(gendiskp, field, -subnd)
- #define part_stat_local_dec(gendiskp, field)                          \
-       local_dec(&(part_stat_get(gendiskp, field)))
- #define part_stat_local_inc(gendiskp, field)                          \
-       local_inc(&(part_stat_get(gendiskp, field)))
- #define part_stat_local_read(gendiskp, field)                         \
-       local_read(&(part_stat_get(gendiskp, field)))
- #define part_stat_local_read_cpu(gendiskp, field, cpu)                        \
-       local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
- unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part);
- void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
-                      unsigned int inflight[2]);
- void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
-                       int rw);
- void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
-                       int rw);
- static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
- {
-       if (disk)
-               return kzalloc_node(sizeof(struct partition_meta_info),
-                                   GFP_KERNEL, disk->node_id);
-       return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL);
- }
- static inline void free_part_info(struct hd_struct *part)
- {
-       kfree(part->info);
- }
- void update_io_ticks(struct hd_struct *part, unsigned long now);
++extern bool disk_has_partitions(struct gendisk *disk);
  
  /* block/genhd.c */
  extern void device_add_disk(struct device *parent, struct gendisk *disk,
@@@ -450,6 -325,8 +314,8 @@@ static inline int get_disk_ro(struct ge
  extern void disk_block_events(struct gendisk *disk);
  extern void disk_unblock_events(struct gendisk *disk);
  extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
+ extern void set_capacity_revalidate_and_notify(struct gendisk *disk,
+                       sector_t size, bool revalidate);
  extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
  
  /* drivers/char/random.c */
@@@ -469,170 -346,11 +335,11 @@@ static inline void set_capacity(struct 
        disk->part0.nr_sects = size;
  }
  
- #ifdef CONFIG_SOLARIS_X86_PARTITION
- #define SOLARIS_X86_NUMSLICE  16
- #define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL)
- struct solaris_x86_slice {
-       __le16 s_tag;           /* ID tag of partition */
-       __le16 s_flag;          /* permission flags */
-       __le32 s_start;         /* start sector no of partition */
-       __le32 s_size;          /* # of blocks in partition */
- };
- struct solaris_x86_vtoc {
-       unsigned int v_bootinfo[3];     /* info needed by mboot (unsupported) */
-       __le32 v_sanity;                /* to verify vtoc sanity */
-       __le32 v_version;               /* layout version */
-       char    v_volume[8];            /* volume name */
-       __le16  v_sectorsz;             /* sector size in bytes */
-       __le16  v_nparts;               /* number of partitions */
-       unsigned int v_reserved[10];    /* free space */
-       struct solaris_x86_slice
-               v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
-       unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */
-       char    v_asciilabel[128];      /* for compatibility */
- };
- #endif /* CONFIG_SOLARIS_X86_PARTITION */
- #ifdef CONFIG_BSD_DISKLABEL
- /*
-  * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
-  * updated by Marc Espie <Marc.Espie@openbsd.org>
-  */
- /* check against BSD src/sys/sys/disklabel.h for consistency */
- #define BSD_DISKMAGIC (0x82564557UL)  /* The disk magic number */
- #define BSD_MAXPARTITIONS     16
- #define OPENBSD_MAXPARTITIONS 16
- #define BSD_FS_UNUSED         0       /* disklabel unused partition entry ID */
- struct bsd_disklabel {
-       __le32  d_magic;                /* the magic number */
-       __s16   d_type;                 /* drive type */
-       __s16   d_subtype;              /* controller/d_type specific */
-       char    d_typename[16];         /* type name, e.g. "eagle" */
-       char    d_packname[16];                 /* pack identifier */ 
-       __u32   d_secsize;              /* # of bytes per sector */
-       __u32   d_nsectors;             /* # of data sectors per track */
-       __u32   d_ntracks;              /* # of tracks per cylinder */
-       __u32   d_ncylinders;           /* # of data cylinders per unit */
-       __u32   d_secpercyl;            /* # of data sectors per cylinder */
-       __u32   d_secperunit;           /* # of data sectors per unit */
-       __u16   d_sparespertrack;       /* # of spare sectors per track */
-       __u16   d_sparespercyl;         /* # of spare sectors per cylinder */
-       __u32   d_acylinders;           /* # of alt. cylinders per unit */
-       __u16   d_rpm;                  /* rotational speed */
-       __u16   d_interleave;           /* hardware sector interleave */
-       __u16   d_trackskew;            /* sector 0 skew, per track */
-       __u16   d_cylskew;              /* sector 0 skew, per cylinder */
-       __u32   d_headswitch;           /* head switch time, usec */
-       __u32   d_trkseek;              /* track-to-track seek, usec */
-       __u32   d_flags;                /* generic flags */
- #define NDDATA 5
-       __u32   d_drivedata[NDDATA];    /* drive-type specific information */
- #define NSPARE 5
-       __u32   d_spare[NSPARE];        /* reserved for future use */
-       __le32  d_magic2;               /* the magic number (again) */
-       __le16  d_checksum;             /* xor of data incl. partitions */
-                       /* filesystem and partition information: */
-       __le16  d_npartitions;          /* number of partitions in following */
-       __le32  d_bbsize;               /* size of boot area at sn0, bytes */
-       __le32  d_sbsize;               /* max size of fs superblock, bytes */
-       struct  bsd_partition {         /* the partition table */
-               __le32  p_size;         /* number of sectors in partition */
-               __le32  p_offset;       /* starting sector */
-               __le32  p_fsize;        /* filesystem basic fragment size */
-               __u8    p_fstype;       /* filesystem type, see below */
-               __u8    p_frag;         /* filesystem fragments per block */
-               __le16  p_cpg;          /* filesystem cylinders per group */
-       } d_partitions[BSD_MAXPARTITIONS];      /* actually may be more */
- };
- #endif        /* CONFIG_BSD_DISKLABEL */
- #ifdef CONFIG_UNIXWARE_DISKLABEL
- /*
-  * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
-  * and Krzysztof G. Baranowski <kgb@knm.org.pl>
-  */
- #define UNIXWARE_DISKMAGIC     (0xCA5E600DUL) /* The disk magic number */
- #define UNIXWARE_DISKMAGIC2    (0x600DDEEEUL) /* The slice table magic nr */
- #define UNIXWARE_NUMSLICE      16
- #define UNIXWARE_FS_UNUSED     0              /* Unused slice entry ID */
- struct unixware_slice {
-       __le16   s_label;       /* label */
-       __le16   s_flags;       /* permission flags */
-       __le32   start_sect;    /* starting sector */
-       __le32   nr_sects;      /* number of sectors in slice */
- };
- struct unixware_disklabel {
-       __le32   d_type;                /* drive type */
-       __le32   d_magic;                /* the magic number */
-       __le32   d_version;              /* version number */
-       char    d_serial[12];           /* serial number of the device */
-       __le32   d_ncylinders;           /* # of data cylinders per device */
-       __le32   d_ntracks;              /* # of tracks per cylinder */
-       __le32   d_nsectors;             /* # of data sectors per track */
-       __le32   d_secsize;              /* # of bytes per sector */
-       __le32   d_part_start;           /* # of first sector of this partition */
-       __le32   d_unknown1[12];         /* ? */
-       __le32  d_alt_tbl;              /* byte offset of alternate table */
-       __le32  d_alt_len;              /* byte length of alternate table */
-       __le32  d_phys_cyl;             /* # of physical cylinders per device */
-       __le32  d_phys_trk;             /* # of physical tracks per cylinder */
-       __le32  d_phys_sec;             /* # of physical sectors per track */
-       __le32  d_phys_bytes;           /* # of physical bytes per sector */
-       __le32  d_unknown2;             /* ? */
-       __le32   d_unknown3;             /* ? */
-       __le32  d_pad[8];               /* pad */
-       struct unixware_vtoc {
-               __le32  v_magic;                /* the magic number */
-               __le32  v_version;              /* version number */
-               char    v_name[8];              /* volume name */
-               __le16  v_nslices;              /* # of slices */
-               __le16  v_unknown1;             /* ? */
-               __le32  v_reserved[10];         /* reserved */
-               struct unixware_slice
-                       v_slice[UNIXWARE_NUMSLICE];     /* slice headers */
-       } vtoc;
- };  /* 408 */
- #endif /* CONFIG_UNIXWARE_DISKLABEL */
- #ifdef CONFIG_MINIX_SUBPARTITION
- #   define MINIX_NR_SUBPARTITIONS  4
- #endif /* CONFIG_MINIX_SUBPARTITION */
- #define ADDPART_FLAG_NONE     0
- #define ADDPART_FLAG_RAID     1
- #define ADDPART_FLAG_WHOLEDISK        2
- extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
- extern void blk_free_devt(dev_t devt);
- extern void blk_invalidate_devt(dev_t devt);
  extern dev_t blk_lookup_devt(const char *name, int partno);
- extern char *disk_name (struct gendisk *hd, int partno, char *buf);
  
  int bdev_disk_changed(struct block_device *bdev, bool invalidate);
  int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
  int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev);
- extern int disk_expand_part_tbl(struct gendisk *disk, int target);
- extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
-                                                    int partno, sector_t start,
-                                                    sector_t len, int flags,
-                                                    struct partition_meta_info
-                                                      *info);
- extern void __delete_partition(struct percpu_ref *);
- extern void delete_partition(struct gendisk *, int);
  extern void printk_all_partitions(void);
  
  extern struct gendisk *__alloc_disk_node(int minors, int node_id);
@@@ -646,20 -364,6 +353,6 @@@ extern void blk_register_region(dev_t d
                        void *data);
  extern void blk_unregister_region(dev_t devt, unsigned long range);
  
- extern ssize_t part_size_show(struct device *dev,
-                             struct device_attribute *attr, char *buf);
- extern ssize_t part_stat_show(struct device *dev,
-                             struct device_attribute *attr, char *buf);
- extern ssize_t part_inflight_show(struct device *dev,
-                             struct device_attribute *attr, char *buf);
- #ifdef CONFIG_FAIL_MAKE_REQUEST
- extern ssize_t part_fail_show(struct device *dev,
-                             struct device_attribute *attr, char *buf);
- extern ssize_t part_fail_store(struct device *dev,
-                              struct device_attribute *attr,
-                              const char *buf, size_t count);
- #endif /* CONFIG_FAIL_MAKE_REQUEST */
  #define alloc_disk_node(minors, node_id)                              \
  ({                                                                    \
        static struct lock_class_key __key;                             \
  
  #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
  
- static inline int hd_ref_init(struct hd_struct *part)
- {
-       if (percpu_ref_init(&part->ref, __delete_partition, 0,
-                               GFP_KERNEL))
-               return -ENOMEM;
-       return 0;
- }
- static inline void hd_struct_get(struct hd_struct *part)
- {
-       percpu_ref_get(&part->ref);
- }
- static inline int hd_struct_try_get(struct hd_struct *part)
- {
-       return percpu_ref_tryget_live(&part->ref);
- }
- static inline void hd_struct_put(struct hd_struct *part)
- {
-       percpu_ref_put(&part->ref);
- }
- static inline void hd_struct_kill(struct hd_struct *part)
- {
-       percpu_ref_kill(&part->ref);
- }
- static inline void hd_free_part(struct hd_struct *part)
- {
-       free_part_stats(part);
-       free_part_info(part);
-       percpu_ref_exit(&part->ref);
- }
- /*
-  * Any access of part->nr_sects which is not protected by partition
-  * bd_mutex or gendisk bdev bd_mutex, should be done using this
-  * accessor function.
-  *
-  * Code written along the lines of i_size_read() and i_size_write().
-  * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
-  * on.
-  */
- static inline sector_t part_nr_sects_read(struct hd_struct *part)
- {
- #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-       sector_t nr_sects;
-       unsigned seq;
-       do {
-               seq = read_seqcount_begin(&part->nr_sects_seq);
-               nr_sects = part->nr_sects;
-       } while (read_seqcount_retry(&part->nr_sects_seq, seq));
-       return nr_sects;
- #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
-       sector_t nr_sects;
-       preempt_disable();
-       nr_sects = part->nr_sects;
-       preempt_enable();
-       return nr_sects;
- #else
-       return part->nr_sects;
- #endif
- }
- /*
-  * Should be called with mutex lock held (typically bd_mutex) of partition
-  * to provide mutual exlusion among writers otherwise seqcount might be
-  * left in wrong state leaving the readers spinning infinitely.
-  */
- static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
- {
- #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-       write_seqcount_begin(&part->nr_sects_seq);
-       part->nr_sects = size;
-       write_seqcount_end(&part->nr_sects_seq);
- #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
-       preempt_disable();
-       part->nr_sects = size;
-       preempt_enable();
- #else
-       part->nr_sects = size;
- #endif
- }
- #if defined(CONFIG_BLK_DEV_INTEGRITY)
- extern void blk_integrity_add(struct gendisk *);
- extern void blk_integrity_del(struct gendisk *);
- #else /* CONFIG_BLK_DEV_INTEGRITY */
- static inline void blk_integrity_add(struct gendisk *disk) { }
- static inline void blk_integrity_del(struct gendisk *disk) { }
- #endif        /* CONFIG_BLK_DEV_INTEGRITY */
  #else /* CONFIG_BLOCK */
  
  static inline void printk_all_partitions(void) { }