cxl/region: Allocate HPA capacity to regions
authorDan Williams <dan.j.williams@intel.com>
Mon, 25 Apr 2022 18:43:44 +0000 (11:43 -0700)
committerDan Williams <dan.j.williams@intel.com>
Mon, 25 Jul 2022 19:18:06 +0000 (12:18 -0700)
After a region's interleave parameters (ways and granularity) are set,
add a way for regions to allocate HPA (host physical address space) from
the free capacity in their parent root-decoder. The allocator for this
capacity reuses the 'struct resource' based allocator used for
CONFIG_DEVICE_PRIVATE.

Once the tuple of "ways, granularity, [uuid], and size" is set the
region configuration transitions to the CXL_CONFIG_INTERLEAVE_ACTIVE
state which is a precursor to allowing endpoint decoders to be added to
a region.

Co-developed-by: Ben Widawsky <bwidawsk@kernel.org>
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784335630.1758207.420216490941955417.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Documentation/ABI/testing/sysfs-bus-cxl
drivers/cxl/Kconfig
drivers/cxl/core/region.c
drivers/cxl/cxl.h

index df34717..52764c1 100644 (file)
@@ -324,3 +324,32 @@ Description:
                (RW) Configures the number of devices participating in the
                region is set by writing this value. Each device will provide
                1/interleave_ways of storage for the region.
+
+
+What:          /sys/bus/cxl/devices/regionZ/size
+Date:          May, 2022
+KernelVersion: v5.20
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RW) System physical address space to be consumed by the region.
+               When written trigger the driver to allocate space out of the
+               parent root decoder's address space. When read the size of the
+               address space is reported and should match the span of the
+               region's resource attribute. Size shall be set after the
+               interleave configuration parameters. Once set it cannot be
+               changed, only freed by writing 0. The kernel makes no guarantees
+               that data is maintained over an address space freeing event, and
+               there is no guarantee that a free followed by an allocate
+               results in the same address being allocated.
+
+
+What:          /sys/bus/cxl/devices/regionZ/resource
+Date:          May, 2022
+KernelVersion: v5.20
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) A region is a contiguous partition of a CXL root decoder
+               address space. Region capacity is allocated by writing to the
+               size attribute, the resulting physical address space determined
+               by the driver is reflected here. It is therefore not useful to
+               read this before writing a value to the size attribute.
index 184fdd4..768ced3 100644 (file)
@@ -106,6 +106,9 @@ config CXL_SUSPEND
 config CXL_REGION
        bool
        default CXL_BUS
+       # For MAX_PHYSMEM_BITS
+       depends on SPARSEMEM
        select MEMREGION
+       select GET_FREE_REGION
 
 endif
index 45bc2fa..a84b007 100644 (file)
@@ -250,10 +250,152 @@ out:
 }
 static DEVICE_ATTR_RW(interleave_granularity);
 
+static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+       struct cxl_region *cxlr = to_cxl_region(dev);
+       struct cxl_region_params *p = &cxlr->params;
+       u64 resource = -1ULL;
+       ssize_t rc;
+
+       rc = down_read_interruptible(&cxl_region_rwsem);
+       if (rc)
+               return rc;
+       if (p->res)
+               resource = p->res->start;
+       rc = sysfs_emit(buf, "%#llx\n", resource);
+       up_read(&cxl_region_rwsem);
+
+       return rc;
+}
+static DEVICE_ATTR_RO(resource);
+
+static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
+{
+       struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+       struct cxl_region_params *p = &cxlr->params;
+       struct resource *res;
+       u32 remainder = 0;
+
+       lockdep_assert_held_write(&cxl_region_rwsem);
+
+       /* Nothing to do... */
+       if (p->res && resource_size(res) == size)
+               return 0;
+
+       /* To change size the old size must be freed first */
+       if (p->res)
+               return -EBUSY;
+
+       if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+               return -EBUSY;
+
+       /* ways, granularity and uuid (if PMEM) need to be set before HPA */
+       if (!p->interleave_ways || !p->interleave_granularity ||
+           (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
+               return -ENXIO;
+
+       div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
+       if (remainder)
+               return -EINVAL;
+
+       res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
+                                   dev_name(&cxlr->dev));
+       if (IS_ERR(res)) {
+               dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
+                       PTR_ERR(res));
+               return PTR_ERR(res);
+       }
+
+       p->res = res;
+       p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
+
+       return 0;
+}
+
+static void cxl_region_iomem_release(struct cxl_region *cxlr)
+{
+       struct cxl_region_params *p = &cxlr->params;
+
+       if (device_is_registered(&cxlr->dev))
+               lockdep_assert_held_write(&cxl_region_rwsem);
+       if (p->res) {
+               remove_resource(p->res);
+               kfree(p->res);
+               p->res = NULL;
+       }
+}
+
+static int free_hpa(struct cxl_region *cxlr)
+{
+       struct cxl_region_params *p = &cxlr->params;
+
+       lockdep_assert_held_write(&cxl_region_rwsem);
+
+       if (!p->res)
+               return 0;
+
+       if (p->state >= CXL_CONFIG_ACTIVE)
+               return -EBUSY;
+
+       cxl_region_iomem_release(cxlr);
+       p->state = CXL_CONFIG_IDLE;
+       return 0;
+}
+
+static ssize_t size_store(struct device *dev, struct device_attribute *attr,
+                         const char *buf, size_t len)
+{
+       struct cxl_region *cxlr = to_cxl_region(dev);
+       u64 val;
+       int rc;
+
+       rc = kstrtou64(buf, 0, &val);
+       if (rc)
+               return rc;
+
+       rc = down_write_killable(&cxl_region_rwsem);
+       if (rc)
+               return rc;
+
+       if (val)
+               rc = alloc_hpa(cxlr, val);
+       else
+               rc = free_hpa(cxlr);
+       up_write(&cxl_region_rwsem);
+
+       if (rc)
+               return rc;
+
+       return len;
+}
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+{
+       struct cxl_region *cxlr = to_cxl_region(dev);
+       struct cxl_region_params *p = &cxlr->params;
+       u64 size = 0;
+       ssize_t rc;
+
+       rc = down_read_interruptible(&cxl_region_rwsem);
+       if (rc)
+               return rc;
+       if (p->res)
+               size = resource_size(p->res);
+       rc = sysfs_emit(buf, "%#llx\n", size);
+       up_read(&cxl_region_rwsem);
+
+       return rc;
+}
+static DEVICE_ATTR_RW(size);
+
 static struct attribute *cxl_region_attrs[] = {
        &dev_attr_uuid.attr,
        &dev_attr_interleave_ways.attr,
        &dev_attr_interleave_granularity.attr,
+       &dev_attr_resource.attr,
+       &dev_attr_size.attr,
        NULL,
 };
 
@@ -299,7 +441,11 @@ static struct cxl_region *to_cxl_region(struct device *dev)
 
 static void unregister_region(void *dev)
 {
-       device_unregister(dev);
+       struct cxl_region *cxlr = to_cxl_region(dev);
+
+       device_del(dev);
+       cxl_region_iomem_release(cxlr);
+       put_device(dev);
 }
 
 static struct lock_class_key cxl_region_key;
@@ -451,3 +597,5 @@ static ssize_t delete_region_store(struct device *dev,
        return len;
 }
 DEVICE_ATTR_WO(delete_region);
+
+MODULE_IMPORT_NS(CXL);
index e36f757..9518b57 100644 (file)
@@ -341,6 +341,7 @@ enum cxl_config_state {
  * @uuid: unique id for persistent regions
  * @interleave_ways: number of endpoints in the region
  * @interleave_granularity: capacity each endpoint contributes to a stripe
+ * @res: allocated iomem capacity for this region
  *
  * State transitions are protected by the cxl_region_rwsem
  */
@@ -349,6 +350,7 @@ struct cxl_region_params {
        uuid_t uuid;
        int interleave_ways;
        int interleave_granularity;
+       struct resource *res;
 };
 
 /**