1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/slab.h>
8 #include <linux/uuid.h>
9 #include <linux/sort.h>
10 #include <linux/idr.h>
16 * DOC: cxl core region
18 * CXL Regions represent mapped memory capacity in system physical address
19 * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
20 * Memory ranges, Regions represent the active mapped capacity by the HDM
21 * Decoder Capability structures throughout the Host Bridges, Switches, and
22 * Endpoints in the topology.
24 * Region configuration has ordering constraints. UUID may be set at any time
25 * but is only visible for persistent regions.
26 * 1. Interleave granularity
31 static struct cxl_region *to_cxl_region(struct device *dev);
33 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
36 struct cxl_region *cxlr = to_cxl_region(dev);
37 struct cxl_region_params *p = &cxlr->params;
40 rc = down_read_interruptible(&cxl_region_rwsem);
43 if (cxlr->mode != CXL_DECODER_PMEM)
44 rc = sysfs_emit(buf, "\n");
46 rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
47 up_read(&cxl_region_rwsem);
52 static int is_dup(struct device *match, void *data)
54 struct cxl_region_params *p;
55 struct cxl_region *cxlr;
58 if (!is_cxl_region(match))
61 lockdep_assert_held(&cxl_region_rwsem);
62 cxlr = to_cxl_region(match);
65 if (uuid_equal(&p->uuid, uuid)) {
66 dev_dbg(match, "already has uuid: %pUb\n", uuid);
73 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
74 const char *buf, size_t len)
76 struct cxl_region *cxlr = to_cxl_region(dev);
77 struct cxl_region_params *p = &cxlr->params;
81 if (len != UUID_STRING_LEN + 1)
84 rc = uuid_parse(buf, &temp);
88 if (uuid_is_null(&temp))
91 rc = down_write_killable(&cxl_region_rwsem);
95 if (uuid_equal(&p->uuid, &temp))
99 if (p->state >= CXL_CONFIG_ACTIVE)
102 rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
106 uuid_copy(&p->uuid, &temp);
108 up_write(&cxl_region_rwsem);
114 static DEVICE_ATTR_RW(uuid);
116 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
117 struct cxl_region *cxlr)
119 return xa_load(&port->regions, (unsigned long)cxlr);
122 static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
124 if (!cpu_cache_has_invalidate_memregion()) {
125 if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
128 "Bypassing cpu_cache_invalidate_memregion() for testing!\n");
132 "Failed to synchronize CPU cache state\n");
137 cpu_cache_invalidate_memregion(IORES_DESC_CXL);
141 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
143 struct cxl_region_params *p = &cxlr->params;
147 * Before region teardown attempt to flush, and if the flush
148 * fails cancel the region teardown for data consistency
151 rc = cxl_region_invalidate_memregion(cxlr);
155 for (i = count - 1; i >= 0; i--) {
156 struct cxl_endpoint_decoder *cxled = p->targets[i];
157 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
158 struct cxl_port *iter = cxled_to_port(cxled);
159 struct cxl_dev_state *cxlds = cxlmd->cxlds;
165 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
166 iter = to_cxl_port(iter->dev.parent);
168 for (ep = cxl_ep_load(iter, cxlmd); iter;
169 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
170 struct cxl_region_ref *cxl_rr;
171 struct cxl_decoder *cxld;
173 cxl_rr = cxl_rr_load(iter, cxlr);
174 cxld = cxl_rr->decoder;
176 rc = cxld->reset(cxld);
179 set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
183 rc = cxled->cxld.reset(&cxled->cxld);
186 set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
189 /* all decoders associated with this region have been torn down */
190 clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
195 static int commit_decoder(struct cxl_decoder *cxld)
197 struct cxl_switch_decoder *cxlsd = NULL;
200 return cxld->commit(cxld);
202 if (is_switch_decoder(&cxld->dev))
203 cxlsd = to_cxl_switch_decoder(&cxld->dev);
205 if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
206 "->commit() is required\n"))
211 static int cxl_region_decode_commit(struct cxl_region *cxlr)
213 struct cxl_region_params *p = &cxlr->params;
216 for (i = 0; i < p->nr_targets; i++) {
217 struct cxl_endpoint_decoder *cxled = p->targets[i];
218 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
219 struct cxl_region_ref *cxl_rr;
220 struct cxl_decoder *cxld;
221 struct cxl_port *iter;
224 /* commit bottom up */
225 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
226 iter = to_cxl_port(iter->dev.parent)) {
227 cxl_rr = cxl_rr_load(iter, cxlr);
228 cxld = cxl_rr->decoder;
229 rc = commit_decoder(cxld);
235 /* programming @iter failed, teardown */
236 for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
237 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
238 cxl_rr = cxl_rr_load(iter, cxlr);
239 cxld = cxl_rr->decoder;
244 cxled->cxld.reset(&cxled->cxld);
252 /* undo the targets that were successfully committed */
253 cxl_region_decode_reset(cxlr, i);
257 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
258 const char *buf, size_t len)
260 struct cxl_region *cxlr = to_cxl_region(dev);
261 struct cxl_region_params *p = &cxlr->params;
265 rc = kstrtobool(buf, &commit);
269 rc = down_write_killable(&cxl_region_rwsem);
273 /* Already in the requested state? */
274 if (commit && p->state >= CXL_CONFIG_COMMIT)
276 if (!commit && p->state < CXL_CONFIG_COMMIT)
279 /* Not ready to commit? */
280 if (commit && p->state < CXL_CONFIG_ACTIVE) {
286 * Invalidate caches before region setup to drop any speculative
287 * consumption of this address space
289 rc = cxl_region_invalidate_memregion(cxlr);
294 rc = cxl_region_decode_commit(cxlr);
296 p->state = CXL_CONFIG_COMMIT;
298 p->state = CXL_CONFIG_RESET_PENDING;
299 up_write(&cxl_region_rwsem);
300 device_release_driver(&cxlr->dev);
301 down_write(&cxl_region_rwsem);
304 * The lock was dropped, so need to revalidate that the reset is
307 if (p->state == CXL_CONFIG_RESET_PENDING) {
308 rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
310 * Revert to committed since there may still be active
311 * decoders associated with this region, or move forward
312 * to active to mark the reset successful
315 p->state = CXL_CONFIG_COMMIT;
317 p->state = CXL_CONFIG_ACTIVE;
322 up_write(&cxl_region_rwsem);
329 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
332 struct cxl_region *cxlr = to_cxl_region(dev);
333 struct cxl_region_params *p = &cxlr->params;
336 rc = down_read_interruptible(&cxl_region_rwsem);
339 rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
340 up_read(&cxl_region_rwsem);
344 static DEVICE_ATTR_RW(commit);
346 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
349 struct device *dev = kobj_to_dev(kobj);
350 struct cxl_region *cxlr = to_cxl_region(dev);
353 * Support tooling that expects to find a 'uuid' attribute for all
354 * regions regardless of mode.
356 if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
361 static ssize_t interleave_ways_show(struct device *dev,
362 struct device_attribute *attr, char *buf)
364 struct cxl_region *cxlr = to_cxl_region(dev);
365 struct cxl_region_params *p = &cxlr->params;
368 rc = down_read_interruptible(&cxl_region_rwsem);
371 rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
372 up_read(&cxl_region_rwsem);
377 static const struct attribute_group *get_cxl_region_target_group(void);
379 static ssize_t interleave_ways_store(struct device *dev,
380 struct device_attribute *attr,
381 const char *buf, size_t len)
383 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
384 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
385 struct cxl_region *cxlr = to_cxl_region(dev);
386 struct cxl_region_params *p = &cxlr->params;
387 unsigned int val, save;
391 rc = kstrtouint(buf, 0, &val);
395 rc = ways_to_eiw(val, &iw);
400 * Even for x3, x9, and x12 interleaves the region interleave must be a
401 * power of 2 multiple of the host bridge interleave.
403 if (!is_power_of_2(val / cxld->interleave_ways) ||
404 (val % cxld->interleave_ways)) {
405 dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
409 rc = down_write_killable(&cxl_region_rwsem);
412 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
417 save = p->interleave_ways;
418 p->interleave_ways = val;
419 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
421 p->interleave_ways = save;
423 up_write(&cxl_region_rwsem);
428 static DEVICE_ATTR_RW(interleave_ways);
430 static ssize_t interleave_granularity_show(struct device *dev,
431 struct device_attribute *attr,
434 struct cxl_region *cxlr = to_cxl_region(dev);
435 struct cxl_region_params *p = &cxlr->params;
438 rc = down_read_interruptible(&cxl_region_rwsem);
441 rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
442 up_read(&cxl_region_rwsem);
447 static ssize_t interleave_granularity_store(struct device *dev,
448 struct device_attribute *attr,
449 const char *buf, size_t len)
451 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
452 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
453 struct cxl_region *cxlr = to_cxl_region(dev);
454 struct cxl_region_params *p = &cxlr->params;
458 rc = kstrtoint(buf, 0, &val);
462 rc = granularity_to_eig(val, &ig);
467 * When the host-bridge is interleaved, disallow region granularity !=
468 * root granularity. Regions with a granularity less than the root
469 * interleave result in needing multiple endpoints to support a single
470 * slot in the interleave (possible to support in the future). Regions
471 * with a granularity greater than the root interleave result in invalid
472 * DPA translations (invalid to support).
474 if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
477 rc = down_write_killable(&cxl_region_rwsem);
480 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
485 p->interleave_granularity = val;
487 up_write(&cxl_region_rwsem);
492 static DEVICE_ATTR_RW(interleave_granularity);
494 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
497 struct cxl_region *cxlr = to_cxl_region(dev);
498 struct cxl_region_params *p = &cxlr->params;
499 u64 resource = -1ULL;
502 rc = down_read_interruptible(&cxl_region_rwsem);
506 resource = p->res->start;
507 rc = sysfs_emit(buf, "%#llx\n", resource);
508 up_read(&cxl_region_rwsem);
512 static DEVICE_ATTR_RO(resource);
514 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
517 struct cxl_region *cxlr = to_cxl_region(dev);
519 return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
521 static DEVICE_ATTR_RO(mode);
523 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
525 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
526 struct cxl_region_params *p = &cxlr->params;
527 struct resource *res;
530 lockdep_assert_held_write(&cxl_region_rwsem);
532 /* Nothing to do... */
533 if (p->res && resource_size(p->res) == size)
536 /* To change size the old size must be freed first */
540 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
543 /* ways, granularity and uuid (if PMEM) need to be set before HPA */
544 if (!p->interleave_ways || !p->interleave_granularity ||
545 (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
548 div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
552 res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
553 dev_name(&cxlr->dev));
555 dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
561 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
566 static void cxl_region_iomem_release(struct cxl_region *cxlr)
568 struct cxl_region_params *p = &cxlr->params;
570 if (device_is_registered(&cxlr->dev))
571 lockdep_assert_held_write(&cxl_region_rwsem);
574 * Autodiscovered regions may not have been able to insert their
578 remove_resource(p->res);
584 static int free_hpa(struct cxl_region *cxlr)
586 struct cxl_region_params *p = &cxlr->params;
588 lockdep_assert_held_write(&cxl_region_rwsem);
593 if (p->state >= CXL_CONFIG_ACTIVE)
596 cxl_region_iomem_release(cxlr);
597 p->state = CXL_CONFIG_IDLE;
601 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
602 const char *buf, size_t len)
604 struct cxl_region *cxlr = to_cxl_region(dev);
608 rc = kstrtou64(buf, 0, &val);
612 rc = down_write_killable(&cxl_region_rwsem);
617 rc = alloc_hpa(cxlr, val);
620 up_write(&cxl_region_rwsem);
628 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
631 struct cxl_region *cxlr = to_cxl_region(dev);
632 struct cxl_region_params *p = &cxlr->params;
636 rc = down_read_interruptible(&cxl_region_rwsem);
640 size = resource_size(p->res);
641 rc = sysfs_emit(buf, "%#llx\n", size);
642 up_read(&cxl_region_rwsem);
646 static DEVICE_ATTR_RW(size);
648 static struct attribute *cxl_region_attrs[] = {
650 &dev_attr_commit.attr,
651 &dev_attr_interleave_ways.attr,
652 &dev_attr_interleave_granularity.attr,
653 &dev_attr_resource.attr,
659 static const struct attribute_group cxl_region_group = {
660 .attrs = cxl_region_attrs,
661 .is_visible = cxl_region_visible,
664 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
666 struct cxl_region_params *p = &cxlr->params;
667 struct cxl_endpoint_decoder *cxled;
670 rc = down_read_interruptible(&cxl_region_rwsem);
674 if (pos >= p->interleave_ways) {
675 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
681 cxled = p->targets[pos];
683 rc = sysfs_emit(buf, "\n");
685 rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
687 up_read(&cxl_region_rwsem);
692 static int match_free_decoder(struct device *dev, void *data)
694 struct cxl_decoder *cxld;
697 if (!is_switch_decoder(dev))
700 cxld = to_cxl_decoder(dev);
702 /* enforce ordered allocation */
714 static int match_auto_decoder(struct device *dev, void *data)
716 struct cxl_region_params *p = data;
717 struct cxl_decoder *cxld;
720 if (!is_switch_decoder(dev))
723 cxld = to_cxl_decoder(dev);
724 r = &cxld->hpa_range;
726 if (p->res && p->res->start == r->start && p->res->end == r->end)
732 static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
733 struct cxl_region *cxlr)
738 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
739 dev = device_find_child(&port->dev, &cxlr->params,
742 dev = device_find_child(&port->dev, &id, match_free_decoder);
746 * This decoder is pinned registered as long as the endpoint decoder is
747 * registered, and endpoint decoder unregistration holds the
748 * cxl_region_rwsem over unregister events, so no need to hold on to
749 * this extra reference.
752 return to_cxl_decoder(dev);
755 static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
756 struct cxl_region *cxlr)
758 struct cxl_region_params *p = &cxlr->params;
759 struct cxl_region_ref *cxl_rr, *iter;
763 xa_for_each(&port->regions, index, iter) {
764 struct cxl_region_params *ip = &iter->region->params;
769 if (ip->res->start > p->res->start) {
771 "%s: HPA order violation %s:%pr vs %pr\n",
772 dev_name(&port->dev),
773 dev_name(&iter->region->dev), ip->res, p->res);
774 return ERR_PTR(-EBUSY);
778 cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
780 return ERR_PTR(-ENOMEM);
782 cxl_rr->region = cxlr;
783 cxl_rr->nr_targets = 1;
784 xa_init(&cxl_rr->endpoints);
786 rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
789 "%s: failed to track region reference: %d\n",
790 dev_name(&port->dev), rc);
798 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
800 struct cxl_region *cxlr = cxl_rr->region;
801 struct cxl_decoder *cxld = cxl_rr->decoder;
806 dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
807 if (cxld->region == cxlr) {
809 put_device(&cxlr->dev);
813 static void free_region_ref(struct cxl_region_ref *cxl_rr)
815 struct cxl_port *port = cxl_rr->port;
816 struct cxl_region *cxlr = cxl_rr->region;
818 cxl_rr_free_decoder(cxl_rr);
819 xa_erase(&port->regions, (unsigned long)cxlr);
820 xa_destroy(&cxl_rr->endpoints);
824 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
825 struct cxl_endpoint_decoder *cxled)
828 struct cxl_port *port = cxl_rr->port;
829 struct cxl_region *cxlr = cxl_rr->region;
830 struct cxl_decoder *cxld = cxl_rr->decoder;
831 struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
834 rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
843 get_device(&cxlr->dev);
849 static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
850 struct cxl_endpoint_decoder *cxled,
851 struct cxl_region_ref *cxl_rr)
853 struct cxl_decoder *cxld;
855 if (port == cxled_to_port(cxled))
858 cxld = cxl_region_find_decoder(port, cxlr);
860 dev_dbg(&cxlr->dev, "%s: no decoder available\n",
861 dev_name(&port->dev));
866 dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
867 dev_name(&port->dev), dev_name(&cxld->dev),
868 dev_name(&cxld->region->dev));
873 * Endpoints should already match the region type, but backstop that
874 * assumption with an assertion. Switch-decoders change mapping-type
875 * based on what is mapped when they are assigned to a region.
877 dev_WARN_ONCE(&cxlr->dev,
878 port == cxled_to_port(cxled) &&
879 cxld->target_type != cxlr->type,
880 "%s:%s mismatch decoder type %d -> %d\n",
881 dev_name(&cxled_to_memdev(cxled)->dev),
882 dev_name(&cxld->dev), cxld->target_type, cxlr->type);
883 cxld->target_type = cxlr->type;
884 cxl_rr->decoder = cxld;
889 * cxl_port_attach_region() - track a region's interest in a port by endpoint
890 * @port: port to add a new region reference 'struct cxl_region_ref'
891 * @cxlr: region to attach to @port
892 * @cxled: endpoint decoder used to create or further pin a region reference
893 * @pos: interleave position of @cxled in @cxlr
895 * The attach event is an opportunity to validate CXL decode setup
896 * constraints and record metadata needed for programming HDM decoders,
897 * in particular decoder target lists.
901 * - validate that there are no other regions with a higher HPA already
902 * associated with @port
903 * - establish a region reference if one is not already present
905 * - additionally allocate a decoder instance that will host @cxlr on
908 * - pin the region reference by the endpoint
909 * - account for how many entries in @port's target list are needed to
910 * cover all of the added endpoints.
912 static int cxl_port_attach_region(struct cxl_port *port,
913 struct cxl_region *cxlr,
914 struct cxl_endpoint_decoder *cxled, int pos)
916 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
917 struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
918 struct cxl_region_ref *cxl_rr;
919 bool nr_targets_inc = false;
920 struct cxl_decoder *cxld;
924 lockdep_assert_held_write(&cxl_region_rwsem);
926 cxl_rr = cxl_rr_load(port, cxlr);
928 struct cxl_ep *ep_iter;
932 * Walk the existing endpoints that have been attached to
933 * @cxlr at @port and see if they share the same 'next' port
934 * in the downstream direction. I.e. endpoints that share common
937 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
940 if (ep_iter->next == ep->next) {
947 * New target port, or @port is an endpoint port that always
948 * accounts its own local decode as a target.
950 if (!found || !ep->next) {
951 cxl_rr->nr_targets++;
952 nr_targets_inc = true;
955 cxl_rr = alloc_region_ref(port, cxlr);
956 if (IS_ERR(cxl_rr)) {
958 "%s: failed to allocate region reference\n",
959 dev_name(&port->dev));
960 return PTR_ERR(cxl_rr);
962 nr_targets_inc = true;
964 rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
968 cxld = cxl_rr->decoder;
970 rc = cxl_rr_ep_add(cxl_rr, cxled);
973 "%s: failed to track endpoint %s:%s reference\n",
974 dev_name(&port->dev), dev_name(&cxlmd->dev),
975 dev_name(&cxld->dev));
980 "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
981 dev_name(port->uport_dev), dev_name(&port->dev),
982 dev_name(&cxld->dev), dev_name(&cxlmd->dev),
983 dev_name(&cxled->cxld.dev), pos,
984 ep ? ep->next ? dev_name(ep->next->uport_dev) :
985 dev_name(&cxlmd->dev) :
987 cxl_rr->nr_eps, cxl_rr->nr_targets);
992 cxl_rr->nr_targets--;
993 if (cxl_rr->nr_eps == 0)
994 free_region_ref(cxl_rr);
998 static void cxl_port_detach_region(struct cxl_port *port,
999 struct cxl_region *cxlr,
1000 struct cxl_endpoint_decoder *cxled)
1002 struct cxl_region_ref *cxl_rr;
1003 struct cxl_ep *ep = NULL;
1005 lockdep_assert_held_write(&cxl_region_rwsem);
1007 cxl_rr = cxl_rr_load(port, cxlr);
1012 * Endpoint ports do not carry cxl_ep references, and they
1013 * never target more than one endpoint by definition
1015 if (cxl_rr->decoder == &cxled->cxld)
1018 ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1020 struct cxl_ep *ep_iter;
1021 unsigned long index;
1025 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1026 if (ep_iter->next == ep->next) {
1032 cxl_rr->nr_targets--;
1035 if (cxl_rr->nr_eps == 0)
1036 free_region_ref(cxl_rr);
1039 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1040 struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1043 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1044 struct cxl_region *cxlr = cxl_rr->region;
1045 struct cxl_region_params *p = &cxlr->params;
1046 struct cxl_endpoint_decoder *cxled_peer;
1047 struct cxl_port *port = cxl_rr->port;
1048 struct cxl_memdev *cxlmd_peer;
1049 struct cxl_ep *ep_peer;
1050 int pos = cxled->pos;
1053 * If this position wants to share a dport with the last endpoint mapped
1054 * then that endpoint, at index 'position - distance', must also be
1055 * mapped by this dport.
1057 if (pos < distance) {
1058 dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1059 dev_name(port->uport_dev), dev_name(&port->dev),
1060 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1063 cxled_peer = p->targets[pos - distance];
1064 cxlmd_peer = cxled_to_memdev(cxled_peer);
1065 ep_peer = cxl_ep_load(port, cxlmd_peer);
1066 if (ep->dport != ep_peer->dport) {
1068 "%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1069 dev_name(port->uport_dev), dev_name(&port->dev),
1070 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1071 dev_name(&cxlmd_peer->dev),
1072 dev_name(&cxled_peer->cxld.dev));
1079 static int cxl_port_setup_targets(struct cxl_port *port,
1080 struct cxl_region *cxlr,
1081 struct cxl_endpoint_decoder *cxled)
1083 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1084 int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
1085 struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1086 struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1087 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1088 struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1089 struct cxl_region_params *p = &cxlr->params;
1090 struct cxl_decoder *cxld = cxl_rr->decoder;
1091 struct cxl_switch_decoder *cxlsd;
1096 * While root level decoders support x3, x6, x12, switch level
1097 * decoders only support powers of 2 up to x16.
1099 if (!is_power_of_2(cxl_rr->nr_targets)) {
1100 dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1101 dev_name(port->uport_dev), dev_name(&port->dev),
1102 cxl_rr->nr_targets);
1106 cxlsd = to_cxl_switch_decoder(&cxld->dev);
1107 if (cxl_rr->nr_targets_set) {
1111 * Passthrough decoders impose no distance requirements between
1114 if (cxl_rr->nr_targets == 1)
1117 distance = p->nr_targets / cxl_rr->nr_targets;
1118 for (i = 0; i < cxl_rr->nr_targets_set; i++)
1119 if (ep->dport == cxlsd->target[i]) {
1120 rc = check_last_peer(cxled, ep, cxl_rr,
1124 goto out_target_set;
1129 if (is_cxl_root(parent_port)) {
1130 parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
1131 parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1133 * For purposes of address bit routing, use power-of-2 math for
1136 if (!is_power_of_2(parent_iw))
1139 struct cxl_region_ref *parent_rr;
1140 struct cxl_decoder *parent_cxld;
1142 parent_rr = cxl_rr_load(parent_port, cxlr);
1143 parent_cxld = parent_rr->decoder;
1144 parent_ig = parent_cxld->interleave_granularity;
1145 parent_iw = parent_cxld->interleave_ways;
1148 rc = granularity_to_eig(parent_ig, &peig);
1150 dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1151 dev_name(parent_port->uport_dev),
1152 dev_name(&parent_port->dev), parent_ig);
1156 rc = ways_to_eiw(parent_iw, &peiw);
1158 dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1159 dev_name(parent_port->uport_dev),
1160 dev_name(&parent_port->dev), parent_iw);
1164 iw = cxl_rr->nr_targets;
1165 rc = ways_to_eiw(iw, &eiw);
1167 dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1168 dev_name(port->uport_dev), dev_name(&port->dev), iw);
1173 * Interleave granularity is a multiple of @parent_port granularity.
1174 * Multiplier is the parent port interleave ways.
1176 rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1179 "%s: invalid granularity calculation (%d * %d)\n",
1180 dev_name(&parent_port->dev), parent_ig, parent_iw);
1184 rc = eig_to_granularity(eig, &ig);
1186 dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1187 dev_name(port->uport_dev), dev_name(&port->dev),
1192 if (iw > 8 || iw > cxlsd->nr_targets) {
1194 "%s:%s:%s: ways: %d overflows targets: %d\n",
1195 dev_name(port->uport_dev), dev_name(&port->dev),
1196 dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1200 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1201 if (cxld->interleave_ways != iw ||
1202 cxld->interleave_granularity != ig ||
1203 cxld->hpa_range.start != p->res->start ||
1204 cxld->hpa_range.end != p->res->end ||
1205 ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1207 "%s:%s %s expected iw: %d ig: %d %pr\n",
1208 dev_name(port->uport_dev), dev_name(&port->dev),
1209 __func__, iw, ig, p->res);
1211 "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1212 dev_name(port->uport_dev), dev_name(&port->dev),
1213 __func__, cxld->interleave_ways,
1214 cxld->interleave_granularity,
1215 (cxld->flags & CXL_DECODER_F_ENABLE) ?
1218 cxld->hpa_range.start, cxld->hpa_range.end);
1222 cxld->interleave_ways = iw;
1223 cxld->interleave_granularity = ig;
1224 cxld->hpa_range = (struct range) {
1225 .start = p->res->start,
1229 dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1230 dev_name(&port->dev), iw, ig);
1232 if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1234 "%s:%s: targets full trying to add %s:%s at %d\n",
1235 dev_name(port->uport_dev), dev_name(&port->dev),
1236 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1239 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1240 if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1241 dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1242 dev_name(port->uport_dev), dev_name(&port->dev),
1243 dev_name(&cxlsd->cxld.dev),
1244 dev_name(ep->dport->dport_dev),
1245 cxl_rr->nr_targets_set);
1249 cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1252 cxl_rr->nr_targets_set += inc;
1253 dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1254 dev_name(port->uport_dev), dev_name(&port->dev),
1255 cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1256 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1261 static void cxl_port_reset_targets(struct cxl_port *port,
1262 struct cxl_region *cxlr)
1264 struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1265 struct cxl_decoder *cxld;
1268 * After the last endpoint has been detached the entire cxl_rr may now
1273 cxl_rr->nr_targets_set = 0;
1275 cxld = cxl_rr->decoder;
1276 cxld->hpa_range = (struct range) {
1282 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1284 struct cxl_region_params *p = &cxlr->params;
1285 struct cxl_endpoint_decoder *cxled;
1286 struct cxl_dev_state *cxlds;
1287 struct cxl_memdev *cxlmd;
1288 struct cxl_port *iter;
1293 * In the auto-discovery case skip automatic teardown since the
1294 * address space is already active
1296 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1299 for (i = 0; i < p->nr_targets; i++) {
1300 cxled = p->targets[i];
1301 cxlmd = cxled_to_memdev(cxled);
1302 cxlds = cxlmd->cxlds;
1307 iter = cxled_to_port(cxled);
1308 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1309 iter = to_cxl_port(iter->dev.parent);
1311 for (ep = cxl_ep_load(iter, cxlmd); iter;
1312 iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1313 cxl_port_reset_targets(iter, cxlr);
1317 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1319 struct cxl_region_params *p = &cxlr->params;
1320 struct cxl_endpoint_decoder *cxled;
1321 struct cxl_dev_state *cxlds;
1322 int i, rc, rch = 0, vh = 0;
1323 struct cxl_memdev *cxlmd;
1324 struct cxl_port *iter;
1327 for (i = 0; i < p->nr_targets; i++) {
1328 cxled = p->targets[i];
1329 cxlmd = cxled_to_memdev(cxled);
1330 cxlds = cxlmd->cxlds;
1332 /* validate that all targets agree on topology */
1340 iter = cxled_to_port(cxled);
1341 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1342 iter = to_cxl_port(iter->dev.parent);
1345 * Descend the topology tree programming / validating
1346 * targets while looking for conflicts.
1348 for (ep = cxl_ep_load(iter, cxlmd); iter;
1349 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1350 rc = cxl_port_setup_targets(iter, cxlr, cxled);
1352 cxl_region_teardown_targets(cxlr);
1359 dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1360 cxl_region_teardown_targets(cxlr);
1367 static int cxl_region_validate_position(struct cxl_region *cxlr,
1368 struct cxl_endpoint_decoder *cxled,
1371 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1372 struct cxl_region_params *p = &cxlr->params;
1375 if (pos < 0 || pos >= p->interleave_ways) {
1376 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1377 p->interleave_ways);
1381 if (p->targets[pos] == cxled)
1384 if (p->targets[pos]) {
1385 struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1386 struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1388 dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1389 pos, dev_name(&cxlmd_target->dev),
1390 dev_name(&cxled_target->cxld.dev));
1394 for (i = 0; i < p->interleave_ways; i++) {
1395 struct cxl_endpoint_decoder *cxled_target;
1396 struct cxl_memdev *cxlmd_target;
1398 cxled_target = p->targets[i];
1402 cxlmd_target = cxled_to_memdev(cxled_target);
1403 if (cxlmd_target == cxlmd) {
1405 "%s already specified at position %d via: %s\n",
1406 dev_name(&cxlmd->dev), pos,
1407 dev_name(&cxled_target->cxld.dev));
1415 static int cxl_region_attach_position(struct cxl_region *cxlr,
1416 struct cxl_root_decoder *cxlrd,
1417 struct cxl_endpoint_decoder *cxled,
1418 const struct cxl_dport *dport, int pos)
1420 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1421 struct cxl_port *iter;
1424 if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1425 dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1426 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1427 dev_name(&cxlrd->cxlsd.cxld.dev));
1431 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1432 iter = to_cxl_port(iter->dev.parent)) {
1433 rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1441 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1442 iter = to_cxl_port(iter->dev.parent))
1443 cxl_port_detach_region(iter, cxlr, cxled);
1447 static int cxl_region_attach_auto(struct cxl_region *cxlr,
1448 struct cxl_endpoint_decoder *cxled, int pos)
1450 struct cxl_region_params *p = &cxlr->params;
1452 if (cxled->state != CXL_DECODER_STATE_AUTO) {
1454 "%s: unable to add decoder to autodetected region\n",
1455 dev_name(&cxled->cxld.dev));
1460 dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1461 dev_name(&cxled->cxld.dev), pos);
1465 if (p->nr_targets >= p->interleave_ways) {
1466 dev_err(&cxlr->dev, "%s: no more target slots available\n",
1467 dev_name(&cxled->cxld.dev));
1472 * Temporarily record the endpoint decoder into the target array. Yes,
1473 * this means that userspace can view devices in the wrong position
1474 * before the region activates, and must be careful to understand when
1475 * it might be racing region autodiscovery.
1477 pos = p->nr_targets;
1478 p->targets[pos] = cxled;
1485 static int cmp_interleave_pos(const void *a, const void *b)
1487 struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1488 struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1490 return cxled_a->pos - cxled_b->pos;
1493 static struct cxl_port *next_port(struct cxl_port *port)
1495 if (!port->parent_dport)
1497 return port->parent_dport->port;
1500 static int match_switch_decoder_by_range(struct device *dev, void *data)
1502 struct cxl_switch_decoder *cxlsd;
1503 struct range *r1, *r2 = data;
1505 if (!is_switch_decoder(dev))
1508 cxlsd = to_cxl_switch_decoder(dev);
1509 r1 = &cxlsd->cxld.hpa_range;
1511 if (is_root_decoder(dev))
1512 return range_contains(r1, r2);
1513 return (r1->start == r2->start && r1->end == r2->end);
1516 static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1517 int *pos, int *ways)
1519 struct cxl_switch_decoder *cxlsd;
1520 struct cxl_port *parent;
1524 parent = next_port(port);
1528 dev = device_find_child(&parent->dev, range,
1529 match_switch_decoder_by_range);
1531 dev_err(port->uport_dev,
1532 "failed to find decoder mapping %#llx-%#llx\n",
1533 range->start, range->end);
1536 cxlsd = to_cxl_switch_decoder(dev);
1537 *ways = cxlsd->cxld.interleave_ways;
1539 for (int i = 0; i < *ways; i++) {
1540 if (cxlsd->target[i] == port->parent_dport) {
1552 * cxl_calc_interleave_pos() - calculate an endpoint position in a region
1553 * @cxled: endpoint decoder member of given region
1555 * The endpoint position is calculated by traversing the topology from
1556 * the endpoint to the root decoder and iteratively applying this
1559 * position = position * parent_ways + parent_pos;
1561 * ...where @position is inferred from switch and root decoder target lists.
1563 * Return: position >= 0 on success
1566 static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
1568 struct cxl_port *iter, *port = cxled_to_port(cxled);
1569 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1570 struct range *range = &cxled->cxld.hpa_range;
1571 int parent_ways = 0, parent_pos = 0, pos = 0;
1575 * Example: the expected interleave order of the 4-way region shown
1576 * below is: mem0, mem2, mem1, mem3
1580 * host_bridge_0 host_bridge_1
1582 * mem0 mem1 mem2 mem3
1584 * In the example the calculator will iterate twice. The first iteration
1585 * uses the mem position in the host-bridge and the ways of the host-
1586 * bridge to generate the first, or local, position. The second
1587 * iteration uses the host-bridge position in the root_port and the ways
1588 * of the root_port to refine the position.
1590 * A trace of the calculation per endpoint looks like this:
1591 * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0
1592 * pos = 0 * 2 + 0 pos = 0 * 2 + 1
1595 * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1
1596 * pos = 1 * 2 + 0 pos = 1 * 2 + 1
1599 * Note that while this example is simple, the method applies to more
1600 * complex topologies, including those with switches.
1603 /* Iterate from endpoint to root_port refining the position */
1604 for (iter = port; iter; iter = next_port(iter)) {
1605 if (is_cxl_root(iter))
1608 rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
1612 pos = pos * parent_ways + parent_pos;
1615 dev_dbg(&cxlmd->dev,
1616 "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1617 dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1618 dev_name(&port->dev), range->start, range->end, pos);
1623 static int cxl_region_sort_targets(struct cxl_region *cxlr)
1625 struct cxl_region_params *p = &cxlr->params;
1628 for (i = 0; i < p->nr_targets; i++) {
1629 struct cxl_endpoint_decoder *cxled = p->targets[i];
1631 cxled->pos = cxl_calc_interleave_pos(cxled);
1633 * Record that sorting failed, but still continue to calc
1634 * cxled->pos so that follow-on code paths can reliably
1635 * do p->targets[cxled->pos] to self-reference their entry.
1640 /* Keep the cxlr target list in interleave position order */
1641 sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
1642 cmp_interleave_pos, NULL);
1644 dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1648 static int cxl_region_attach(struct cxl_region *cxlr,
1649 struct cxl_endpoint_decoder *cxled, int pos)
1651 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1652 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1653 struct cxl_region_params *p = &cxlr->params;
1654 struct cxl_port *ep_port, *root_port;
1655 struct cxl_dport *dport;
1658 if (cxled->mode != cxlr->mode) {
1659 dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
1660 dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
1664 if (cxled->mode == CXL_DECODER_DEAD) {
1665 dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1669 /* all full of members, or interleave config not established? */
1670 if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1671 dev_dbg(&cxlr->dev, "region already active\n");
1673 } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1674 dev_dbg(&cxlr->dev, "interleave config missing\n");
1678 if (p->nr_targets >= p->interleave_ways) {
1679 dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
1684 ep_port = cxled_to_port(cxled);
1685 root_port = cxlrd_to_port(cxlrd);
1686 dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1688 dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1689 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1690 dev_name(cxlr->dev.parent));
1694 if (cxled->cxld.target_type != cxlr->type) {
1695 dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1696 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1697 cxled->cxld.target_type, cxlr->type);
1701 if (!cxled->dpa_res) {
1702 dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1703 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1707 if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1708 resource_size(p->res)) {
1710 "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1711 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1712 (u64)resource_size(cxled->dpa_res), p->interleave_ways,
1713 (u64)resource_size(p->res));
1717 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1720 rc = cxl_region_attach_auto(cxlr, cxled, pos);
1724 /* await more targets to arrive... */
1725 if (p->nr_targets < p->interleave_ways)
1729 * All targets are here, which implies all PCI enumeration that
1730 * affects this region has been completed. Walk the topology to
1731 * sort the devices into their relative region decode position.
1733 rc = cxl_region_sort_targets(cxlr);
1737 for (i = 0; i < p->nr_targets; i++) {
1738 cxled = p->targets[i];
1739 ep_port = cxled_to_port(cxled);
1740 dport = cxl_find_dport_by_dev(root_port,
1741 ep_port->host_bridge);
1742 rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
1748 rc = cxl_region_setup_targets(cxlr);
1753 * If target setup succeeds in the autodiscovery case
1754 * then the region is already committed.
1756 p->state = CXL_CONFIG_COMMIT;
1761 rc = cxl_region_validate_position(cxlr, cxled, pos);
1765 rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
1769 p->targets[pos] = cxled;
1773 if (p->nr_targets == p->interleave_ways) {
1774 rc = cxl_region_setup_targets(cxlr);
1777 p->state = CXL_CONFIG_ACTIVE;
1780 cxled->cxld.interleave_ways = p->interleave_ways;
1781 cxled->cxld.interleave_granularity = p->interleave_granularity;
1782 cxled->cxld.hpa_range = (struct range) {
1783 .start = p->res->start,
1787 if (p->nr_targets != p->interleave_ways)
1791 * Test the auto-discovery position calculator function
1792 * against this successfully created user-defined region.
1793 * A fail message here means that this interleave config
1794 * will fail when presented as CXL_REGION_F_AUTO.
1796 for (int i = 0; i < p->nr_targets; i++) {
1797 struct cxl_endpoint_decoder *cxled = p->targets[i];
1800 test_pos = cxl_calc_interleave_pos(cxled);
1801 dev_dbg(&cxled->cxld.dev,
1802 "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
1803 (test_pos == cxled->pos) ? "success" : "fail",
1804 test_pos, cxled->pos);
1810 static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
1812 struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
1813 struct cxl_region *cxlr = cxled->cxld.region;
1814 struct cxl_region_params *p;
1817 lockdep_assert_held_write(&cxl_region_rwsem);
1823 get_device(&cxlr->dev);
1825 if (p->state > CXL_CONFIG_ACTIVE) {
1827 * TODO: tear down all impacted regions if a device is
1828 * removed out of order
1830 rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
1833 p->state = CXL_CONFIG_ACTIVE;
1836 for (iter = ep_port; !is_cxl_root(iter);
1837 iter = to_cxl_port(iter->dev.parent))
1838 cxl_port_detach_region(iter, cxlr, cxled);
1840 if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
1841 p->targets[cxled->pos] != cxled) {
1842 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1844 dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
1845 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1850 if (p->state == CXL_CONFIG_ACTIVE) {
1851 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
1852 cxl_region_teardown_targets(cxlr);
1854 p->targets[cxled->pos] = NULL;
1856 cxled->cxld.hpa_range = (struct range) {
1861 /* notify the region driver that one of its targets has departed */
1862 up_write(&cxl_region_rwsem);
1863 device_release_driver(&cxlr->dev);
1864 down_write(&cxl_region_rwsem);
1866 put_device(&cxlr->dev);
1870 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
1872 down_write(&cxl_region_rwsem);
1873 cxled->mode = CXL_DECODER_DEAD;
1874 cxl_region_detach(cxled);
1875 up_write(&cxl_region_rwsem);
1878 static int attach_target(struct cxl_region *cxlr,
1879 struct cxl_endpoint_decoder *cxled, int pos,
1884 if (state == TASK_INTERRUPTIBLE)
1885 rc = down_write_killable(&cxl_region_rwsem);
1887 down_write(&cxl_region_rwsem);
1891 down_read(&cxl_dpa_rwsem);
1892 rc = cxl_region_attach(cxlr, cxled, pos);
1893 up_read(&cxl_dpa_rwsem);
1894 up_write(&cxl_region_rwsem);
1898 static int detach_target(struct cxl_region *cxlr, int pos)
1900 struct cxl_region_params *p = &cxlr->params;
1903 rc = down_write_killable(&cxl_region_rwsem);
1907 if (pos >= p->interleave_ways) {
1908 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1909 p->interleave_ways);
1914 if (!p->targets[pos]) {
1919 rc = cxl_region_detach(p->targets[pos]);
1921 up_write(&cxl_region_rwsem);
1925 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
1930 if (sysfs_streq(buf, "\n"))
1931 rc = detach_target(cxlr, pos);
1935 dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
1939 if (!is_endpoint_decoder(dev)) {
1944 rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
1945 TASK_INTERRUPTIBLE);
1955 #define TARGET_ATTR_RW(n) \
1956 static ssize_t target##n##_show( \
1957 struct device *dev, struct device_attribute *attr, char *buf) \
1959 return show_targetN(to_cxl_region(dev), buf, (n)); \
1961 static ssize_t target##n##_store(struct device *dev, \
1962 struct device_attribute *attr, \
1963 const char *buf, size_t len) \
1965 return store_targetN(to_cxl_region(dev), buf, (n), len); \
1967 static DEVICE_ATTR_RW(target##n)
1986 static struct attribute *target_attrs[] = {
1987 &dev_attr_target0.attr,
1988 &dev_attr_target1.attr,
1989 &dev_attr_target2.attr,
1990 &dev_attr_target3.attr,
1991 &dev_attr_target4.attr,
1992 &dev_attr_target5.attr,
1993 &dev_attr_target6.attr,
1994 &dev_attr_target7.attr,
1995 &dev_attr_target8.attr,
1996 &dev_attr_target9.attr,
1997 &dev_attr_target10.attr,
1998 &dev_attr_target11.attr,
1999 &dev_attr_target12.attr,
2000 &dev_attr_target13.attr,
2001 &dev_attr_target14.attr,
2002 &dev_attr_target15.attr,
2006 static umode_t cxl_region_target_visible(struct kobject *kobj,
2007 struct attribute *a, int n)
2009 struct device *dev = kobj_to_dev(kobj);
2010 struct cxl_region *cxlr = to_cxl_region(dev);
2011 struct cxl_region_params *p = &cxlr->params;
2013 if (n < p->interleave_ways)
2018 static const struct attribute_group cxl_region_target_group = {
2019 .attrs = target_attrs,
2020 .is_visible = cxl_region_target_visible,
2023 static const struct attribute_group *get_cxl_region_target_group(void)
2025 return &cxl_region_target_group;
2028 static const struct attribute_group *region_groups[] = {
2029 &cxl_base_attribute_group,
2031 &cxl_region_target_group,
2035 static void cxl_region_release(struct device *dev)
2037 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
2038 struct cxl_region *cxlr = to_cxl_region(dev);
2039 int id = atomic_read(&cxlrd->region_id);
2042 * Try to reuse the recently idled id rather than the cached
2043 * next id to prevent the region id space from increasing
2047 if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2052 memregion_free(cxlr->id);
2054 put_device(dev->parent);
2058 const struct device_type cxl_region_type = {
2059 .name = "cxl_region",
2060 .release = cxl_region_release,
2061 .groups = region_groups
2064 bool is_cxl_region(struct device *dev)
2066 return dev->type == &cxl_region_type;
2068 EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
2070 static struct cxl_region *to_cxl_region(struct device *dev)
2072 if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2073 "not a cxl_region device\n"))
2076 return container_of(dev, struct cxl_region, dev);
2079 static void unregister_region(void *dev)
2081 struct cxl_region *cxlr = to_cxl_region(dev);
2082 struct cxl_region_params *p = &cxlr->params;
2088 * Now that region sysfs is shutdown, the parameter block is now
2089 * read-only, so no need to hold the region rwsem to access the
2090 * region parameters.
2092 for (i = 0; i < p->interleave_ways; i++)
2093 detach_target(cxlr, i);
2095 cxl_region_iomem_release(cxlr);
2099 static struct lock_class_key cxl_region_key;
2101 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2103 struct cxl_region *cxlr;
2106 cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
2109 return ERR_PTR(-ENOMEM);
2113 device_initialize(dev);
2114 lockdep_set_class(&dev->mutex, &cxl_region_key);
2115 dev->parent = &cxlrd->cxlsd.cxld.dev;
2117 * Keep root decoder pinned through cxl_region_release to fixup
2118 * region id allocations
2120 get_device(dev->parent);
2121 device_set_pm_not_required(dev);
2122 dev->bus = &cxl_bus_type;
2123 dev->type = &cxl_region_type;
2130 * devm_cxl_add_region - Adds a region to a decoder
2131 * @cxlrd: root decoder
2132 * @id: memregion id to create, or memregion_free() on failure
2133 * @mode: mode for the endpoint decoders of this region
2134 * @type: select whether this is an expander or accelerator (type-2 or type-3)
2136 * This is the second step of region initialization. Regions exist within an
2137 * address space which is mapped by a @cxlrd.
2139 * Return: 0 if the region was added to the @cxlrd, else returns negative error
2140 * code. The region will be named "regionZ" where Z is the unique region number.
2142 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2144 enum cxl_decoder_mode mode,
2145 enum cxl_decoder_type type)
2147 struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2148 struct cxl_region *cxlr;
2153 case CXL_DECODER_RAM:
2154 case CXL_DECODER_PMEM:
2157 dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2158 return ERR_PTR(-EINVAL);
2161 cxlr = cxl_region_alloc(cxlrd, id);
2168 rc = dev_set_name(dev, "region%d", id);
2172 rc = device_add(dev);
2176 rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2180 dev_dbg(port->uport_dev, "%s: created %s\n",
2181 dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2189 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2191 return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2194 static ssize_t create_pmem_region_show(struct device *dev,
2195 struct device_attribute *attr, char *buf)
2197 return __create_region_show(to_cxl_root_decoder(dev), buf);
2200 static ssize_t create_ram_region_show(struct device *dev,
2201 struct device_attribute *attr, char *buf)
2203 return __create_region_show(to_cxl_root_decoder(dev), buf);
2206 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2207 enum cxl_decoder_mode mode, int id)
2211 rc = memregion_alloc(GFP_KERNEL);
2215 if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2217 return ERR_PTR(-EBUSY);
2220 return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
2223 static ssize_t create_pmem_region_store(struct device *dev,
2224 struct device_attribute *attr,
2225 const char *buf, size_t len)
2227 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2228 struct cxl_region *cxlr;
2231 rc = sscanf(buf, "region%d\n", &id);
2235 cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
2237 return PTR_ERR(cxlr);
2241 DEVICE_ATTR_RW(create_pmem_region);
2243 static ssize_t create_ram_region_store(struct device *dev,
2244 struct device_attribute *attr,
2245 const char *buf, size_t len)
2247 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2248 struct cxl_region *cxlr;
2251 rc = sscanf(buf, "region%d\n", &id);
2255 cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
2257 return PTR_ERR(cxlr);
2261 DEVICE_ATTR_RW(create_ram_region);
2263 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2266 struct cxl_decoder *cxld = to_cxl_decoder(dev);
2269 rc = down_read_interruptible(&cxl_region_rwsem);
2274 rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2276 rc = sysfs_emit(buf, "\n");
2277 up_read(&cxl_region_rwsem);
2281 DEVICE_ATTR_RO(region);
2283 static struct cxl_region *
2284 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2286 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2287 struct device *region_dev;
2289 region_dev = device_find_child_by_name(&cxld->dev, name);
2291 return ERR_PTR(-ENODEV);
2293 return to_cxl_region(region_dev);
2296 static ssize_t delete_region_store(struct device *dev,
2297 struct device_attribute *attr,
2298 const char *buf, size_t len)
2300 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2301 struct cxl_port *port = to_cxl_port(dev->parent);
2302 struct cxl_region *cxlr;
2304 cxlr = cxl_find_region_by_name(cxlrd, buf);
2306 return PTR_ERR(cxlr);
2308 devm_release_action(port->uport_dev, unregister_region, cxlr);
2309 put_device(&cxlr->dev);
2313 DEVICE_ATTR_WO(delete_region);
2315 static void cxl_pmem_region_release(struct device *dev)
2317 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2320 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2321 struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2323 put_device(&cxlmd->dev);
2329 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2330 &cxl_base_attribute_group,
2334 const struct device_type cxl_pmem_region_type = {
2335 .name = "cxl_pmem_region",
2336 .release = cxl_pmem_region_release,
2337 .groups = cxl_pmem_region_attribute_groups,
2340 bool is_cxl_pmem_region(struct device *dev)
2342 return dev->type == &cxl_pmem_region_type;
2344 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
2346 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2348 if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2349 "not a cxl_pmem_region device\n"))
2351 return container_of(dev, struct cxl_pmem_region, dev);
2353 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
2355 struct cxl_poison_context {
2356 struct cxl_port *port;
2357 enum cxl_decoder_mode mode;
2361 static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2362 struct cxl_poison_context *ctx)
2364 struct cxl_dev_state *cxlds = cxlmd->cxlds;
2369 * Collect poison for the remaining unmapped resources
2370 * after poison is collected by committed endpoints.
2372 * Knowing that PMEM must always follow RAM, get poison
2373 * for unmapped resources based on the last decoder's mode:
2374 * ram: scan remains of ram range, then any pmem range
2375 * pmem: scan remains of pmem range
2378 if (ctx->mode == CXL_DECODER_RAM) {
2379 offset = ctx->offset;
2380 length = resource_size(&cxlds->ram_res) - offset;
2381 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2387 if (ctx->mode == CXL_DECODER_PMEM) {
2388 offset = ctx->offset;
2389 length = resource_size(&cxlds->dpa_res) - offset;
2392 } else if (resource_size(&cxlds->pmem_res)) {
2393 offset = cxlds->pmem_res.start;
2394 length = resource_size(&cxlds->pmem_res);
2399 return cxl_mem_get_poison(cxlmd, offset, length, NULL);
2402 static int poison_by_decoder(struct device *dev, void *arg)
2404 struct cxl_poison_context *ctx = arg;
2405 struct cxl_endpoint_decoder *cxled;
2406 struct cxl_memdev *cxlmd;
2410 if (!is_endpoint_decoder(dev))
2413 cxled = to_cxl_endpoint_decoder(dev);
2414 if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
2418 * Regions are only created with single mode decoders: pmem or ram.
2419 * Linux does not support mixed mode decoders. This means that
2420 * reading poison per endpoint decoder adheres to the requirement
2421 * that poison reads of pmem and ram must be separated.
2422 * CXL 3.0 Spec 8.2.9.8.4.1
2424 if (cxled->mode == CXL_DECODER_MIXED) {
2425 dev_dbg(dev, "poison list read unsupported in mixed mode\n");
2429 cxlmd = cxled_to_memdev(cxled);
2431 offset = cxled->dpa_res->start - cxled->skip;
2432 length = cxled->skip;
2433 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2434 if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2440 offset = cxled->dpa_res->start;
2441 length = cxled->dpa_res->end - offset + 1;
2442 rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2443 if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2448 /* Iterate until commit_end is reached */
2449 if (cxled->cxld.id == ctx->port->commit_end) {
2450 ctx->offset = cxled->dpa_res->end + 1;
2451 ctx->mode = cxled->mode;
2458 int cxl_get_poison_by_endpoint(struct cxl_port *port)
2460 struct cxl_poison_context ctx;
2463 rc = down_read_interruptible(&cxl_region_rwsem);
2467 ctx = (struct cxl_poison_context) {
2471 rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2473 rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2476 up_read(&cxl_region_rwsem);
2480 static struct lock_class_key cxl_pmem_region_key;
2482 static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
2484 struct cxl_region_params *p = &cxlr->params;
2485 struct cxl_nvdimm_bridge *cxl_nvb;
2486 struct cxl_pmem_region *cxlr_pmem;
2490 down_read(&cxl_region_rwsem);
2491 if (p->state != CXL_CONFIG_COMMIT) {
2492 cxlr_pmem = ERR_PTR(-ENXIO);
2496 cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
2499 cxlr_pmem = ERR_PTR(-ENOMEM);
2503 cxlr_pmem->hpa_range.start = p->res->start;
2504 cxlr_pmem->hpa_range.end = p->res->end;
2506 /* Snapshot the region configuration underneath the cxl_region_rwsem */
2507 cxlr_pmem->nr_mappings = p->nr_targets;
2508 for (i = 0; i < p->nr_targets; i++) {
2509 struct cxl_endpoint_decoder *cxled = p->targets[i];
2510 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2511 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
2514 * Regions never span CXL root devices, so by definition the
2515 * bridge for one device is the same for all.
2518 cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
2520 cxlr_pmem = ERR_PTR(-ENODEV);
2523 cxlr->cxl_nvb = cxl_nvb;
2526 get_device(&cxlmd->dev);
2527 m->start = cxled->dpa_res->start;
2528 m->size = resource_size(cxled->dpa_res);
2532 dev = &cxlr_pmem->dev;
2533 cxlr_pmem->cxlr = cxlr;
2534 cxlr->cxlr_pmem = cxlr_pmem;
2535 device_initialize(dev);
2536 lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
2537 device_set_pm_not_required(dev);
2538 dev->parent = &cxlr->dev;
2539 dev->bus = &cxl_bus_type;
2540 dev->type = &cxl_pmem_region_type;
2542 up_read(&cxl_region_rwsem);
2547 static void cxl_dax_region_release(struct device *dev)
2549 struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
2554 static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
2555 &cxl_base_attribute_group,
2559 const struct device_type cxl_dax_region_type = {
2560 .name = "cxl_dax_region",
2561 .release = cxl_dax_region_release,
2562 .groups = cxl_dax_region_attribute_groups,
2565 static bool is_cxl_dax_region(struct device *dev)
2567 return dev->type == &cxl_dax_region_type;
2570 struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
2572 if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
2573 "not a cxl_dax_region device\n"))
2575 return container_of(dev, struct cxl_dax_region, dev);
2577 EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
2579 static struct lock_class_key cxl_dax_region_key;
2581 static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
2583 struct cxl_region_params *p = &cxlr->params;
2584 struct cxl_dax_region *cxlr_dax;
2587 down_read(&cxl_region_rwsem);
2588 if (p->state != CXL_CONFIG_COMMIT) {
2589 cxlr_dax = ERR_PTR(-ENXIO);
2593 cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
2595 cxlr_dax = ERR_PTR(-ENOMEM);
2599 cxlr_dax->hpa_range.start = p->res->start;
2600 cxlr_dax->hpa_range.end = p->res->end;
2602 dev = &cxlr_dax->dev;
2603 cxlr_dax->cxlr = cxlr;
2604 device_initialize(dev);
2605 lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
2606 device_set_pm_not_required(dev);
2607 dev->parent = &cxlr->dev;
2608 dev->bus = &cxl_bus_type;
2609 dev->type = &cxl_dax_region_type;
2611 up_read(&cxl_region_rwsem);
2616 static void cxlr_pmem_unregister(void *_cxlr_pmem)
2618 struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
2619 struct cxl_region *cxlr = cxlr_pmem->cxlr;
2620 struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2623 * Either the bridge is in ->remove() context under the device_lock(),
2624 * or cxlr_release_nvdimm() is cancelling the bridge's release action
2625 * for @cxlr_pmem and doing it itself (while manually holding the bridge
2628 device_lock_assert(&cxl_nvb->dev);
2629 cxlr->cxlr_pmem = NULL;
2630 cxlr_pmem->cxlr = NULL;
2631 device_unregister(&cxlr_pmem->dev);
2634 static void cxlr_release_nvdimm(void *_cxlr)
2636 struct cxl_region *cxlr = _cxlr;
2637 struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2639 device_lock(&cxl_nvb->dev);
2640 if (cxlr->cxlr_pmem)
2641 devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
2643 device_unlock(&cxl_nvb->dev);
2644 cxlr->cxl_nvb = NULL;
2645 put_device(&cxl_nvb->dev);
2649 * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
2650 * @cxlr: parent CXL region for this pmem region bridge device
2652 * Return: 0 on success negative error code on failure.
2654 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
2656 struct cxl_pmem_region *cxlr_pmem;
2657 struct cxl_nvdimm_bridge *cxl_nvb;
2661 cxlr_pmem = cxl_pmem_region_alloc(cxlr);
2662 if (IS_ERR(cxlr_pmem))
2663 return PTR_ERR(cxlr_pmem);
2664 cxl_nvb = cxlr->cxl_nvb;
2666 dev = &cxlr_pmem->dev;
2667 rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
2671 rc = device_add(dev);
2675 dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2678 device_lock(&cxl_nvb->dev);
2679 if (cxl_nvb->dev.driver)
2680 rc = devm_add_action_or_reset(&cxl_nvb->dev,
2681 cxlr_pmem_unregister, cxlr_pmem);
2684 device_unlock(&cxl_nvb->dev);
2689 /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
2690 return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
2695 put_device(&cxl_nvb->dev);
2696 cxlr->cxl_nvb = NULL;
2700 static void cxlr_dax_unregister(void *_cxlr_dax)
2702 struct cxl_dax_region *cxlr_dax = _cxlr_dax;
2704 device_unregister(&cxlr_dax->dev);
2707 static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
2709 struct cxl_dax_region *cxlr_dax;
2713 cxlr_dax = cxl_dax_region_alloc(cxlr);
2714 if (IS_ERR(cxlr_dax))
2715 return PTR_ERR(cxlr_dax);
2717 dev = &cxlr_dax->dev;
2718 rc = dev_set_name(dev, "dax_region%d", cxlr->id);
2722 rc = device_add(dev);
2726 dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2729 return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
2736 static int match_root_decoder_by_range(struct device *dev, void *data)
2738 struct range *r1, *r2 = data;
2739 struct cxl_root_decoder *cxlrd;
2741 if (!is_root_decoder(dev))
2744 cxlrd = to_cxl_root_decoder(dev);
2745 r1 = &cxlrd->cxlsd.cxld.hpa_range;
2746 return range_contains(r1, r2);
2749 static int match_region_by_range(struct device *dev, void *data)
2751 struct cxl_region_params *p;
2752 struct cxl_region *cxlr;
2753 struct range *r = data;
2756 if (!is_cxl_region(dev))
2759 cxlr = to_cxl_region(dev);
2762 down_read(&cxl_region_rwsem);
2763 if (p->res && p->res->start == r->start && p->res->end == r->end)
2765 up_read(&cxl_region_rwsem);
2770 /* Establish an empty region covering the given HPA range */
2771 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
2772 struct cxl_endpoint_decoder *cxled)
2774 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2775 struct cxl_port *port = cxlrd_to_port(cxlrd);
2776 struct range *hpa = &cxled->cxld.hpa_range;
2777 struct cxl_region_params *p;
2778 struct cxl_region *cxlr;
2779 struct resource *res;
2783 cxlr = __create_region(cxlrd, cxled->mode,
2784 atomic_read(&cxlrd->region_id));
2785 } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
2788 dev_err(cxlmd->dev.parent,
2789 "%s:%s: %s failed assign region: %ld\n",
2790 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2791 __func__, PTR_ERR(cxlr));
2795 down_write(&cxl_region_rwsem);
2797 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
2798 dev_err(cxlmd->dev.parent,
2799 "%s:%s: %s autodiscovery interrupted\n",
2800 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2806 set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
2808 res = kmalloc(sizeof(*res), GFP_KERNEL);
2814 *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
2815 dev_name(&cxlr->dev));
2816 rc = insert_resource(cxlrd->res, res);
2819 * Platform-firmware may not have split resources like "System
2820 * RAM" on CXL window boundaries see cxl_region_iomem_release()
2822 dev_warn(cxlmd->dev.parent,
2823 "%s:%s: %s %s cannot insert resource\n",
2824 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2825 __func__, dev_name(&cxlr->dev));
2829 p->interleave_ways = cxled->cxld.interleave_ways;
2830 p->interleave_granularity = cxled->cxld.interleave_granularity;
2831 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2833 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
2837 dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
2838 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
2839 dev_name(&cxlr->dev), p->res, p->interleave_ways,
2840 p->interleave_granularity);
2842 /* ...to match put_device() in cxl_add_to_region() */
2843 get_device(&cxlr->dev);
2844 up_write(&cxl_region_rwsem);
2849 up_write(&cxl_region_rwsem);
2850 devm_release_action(port->uport_dev, unregister_region, cxlr);
2854 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
2856 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2857 struct range *hpa = &cxled->cxld.hpa_range;
2858 struct cxl_decoder *cxld = &cxled->cxld;
2859 struct device *cxlrd_dev, *region_dev;
2860 struct cxl_root_decoder *cxlrd;
2861 struct cxl_region_params *p;
2862 struct cxl_region *cxlr;
2863 bool attach = false;
2866 cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
2867 match_root_decoder_by_range);
2869 dev_err(cxlmd->dev.parent,
2870 "%s:%s no CXL window for range %#llx:%#llx\n",
2871 dev_name(&cxlmd->dev), dev_name(&cxld->dev),
2872 cxld->hpa_range.start, cxld->hpa_range.end);
2876 cxlrd = to_cxl_root_decoder(cxlrd_dev);
2879 * Ensure that if multiple threads race to construct_region() for @hpa
2880 * one does the construction and the others add to that.
2882 mutex_lock(&cxlrd->range_lock);
2883 region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
2884 match_region_by_range);
2886 cxlr = construct_region(cxlrd, cxled);
2887 region_dev = &cxlr->dev;
2889 cxlr = to_cxl_region(region_dev);
2890 mutex_unlock(&cxlrd->range_lock);
2892 rc = PTR_ERR_OR_ZERO(cxlr);
2896 attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
2898 down_read(&cxl_region_rwsem);
2900 attach = p->state == CXL_CONFIG_COMMIT;
2901 up_read(&cxl_region_rwsem);
2905 * If device_attach() fails the range may still be active via
2906 * the platform-firmware memory map, otherwise the driver for
2907 * regions is local to this file, so driver matching can't fail.
2909 if (device_attach(&cxlr->dev) < 0)
2910 dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
2914 put_device(region_dev);
2916 put_device(cxlrd_dev);
2919 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
2921 static int is_system_ram(struct resource *res, void *arg)
2923 struct cxl_region *cxlr = arg;
2924 struct cxl_region_params *p = &cxlr->params;
2926 dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
2930 static int cxl_region_probe(struct device *dev)
2932 struct cxl_region *cxlr = to_cxl_region(dev);
2933 struct cxl_region_params *p = &cxlr->params;
2936 rc = down_read_interruptible(&cxl_region_rwsem);
2938 dev_dbg(&cxlr->dev, "probe interrupted\n");
2942 if (p->state < CXL_CONFIG_COMMIT) {
2943 dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
2948 if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
2950 "failed to activate, re-commit region and retry\n");
2956 * From this point on any path that changes the region's state away from
2957 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
2960 up_read(&cxl_region_rwsem);
2965 switch (cxlr->mode) {
2966 case CXL_DECODER_PMEM:
2967 return devm_cxl_add_pmem_region(cxlr);
2968 case CXL_DECODER_RAM:
2970 * The region can not be manged by CXL if any portion of
2971 * it is already online as 'System RAM'
2973 if (walk_iomem_res_desc(IORES_DESC_NONE,
2974 IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
2975 p->res->start, p->res->end, cxlr,
2978 return devm_cxl_add_dax_region(cxlr);
2980 dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
2986 static struct cxl_driver cxl_region_driver = {
2987 .name = "cxl_region",
2988 .probe = cxl_region_probe,
2989 .id = CXL_DEVICE_REGION,
2992 int cxl_region_init(void)
2994 return cxl_driver_register(&cxl_region_driver);
2997 void cxl_region_exit(void)
2999 cxl_driver_unregister(&cxl_region_driver);
3002 MODULE_IMPORT_NS(CXL);
3003 MODULE_IMPORT_NS(DEVMEM);
3004 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);