1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/libnvdimm.h>
4 #include <asm/unaligned.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/ndctl.h>
8 #include <linux/async.h>
9 #include <linux/slab.h>
15 * Ordered workqueue for cxl nvdimm device arrival and departure
16 * to coordinate bus rescans when a bridge arrives and trigger remove
17 * operations when the bridge is removed.
19 static struct workqueue_struct *cxl_pmem_wq;
21 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
23 static void clear_exclusive(void *cxlds)
25 clear_exclusive_cxl_commands(cxlds, exclusive_cmds);
28 static void unregister_nvdimm(void *nvdimm)
30 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
31 struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
32 struct cxl_pmem_region *cxlr_pmem;
35 device_lock(&cxl_nvb->dev);
36 dev_set_drvdata(&cxl_nvd->dev, NULL);
37 xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
38 get_device(&cxlr_pmem->dev);
39 device_unlock(&cxl_nvb->dev);
41 device_release_driver(&cxlr_pmem->dev);
42 put_device(&cxlr_pmem->dev);
44 device_lock(&cxl_nvb->dev);
46 device_unlock(&cxl_nvb->dev);
48 nvdimm_delete(nvdimm);
49 cxl_nvd->bridge = NULL;
52 static int cxl_nvdimm_probe(struct device *dev)
54 struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
55 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
56 unsigned long flags = 0, cmd_mask = 0;
57 struct cxl_dev_state *cxlds = cxlmd->cxlds;
58 struct cxl_nvdimm_bridge *cxl_nvb;
59 struct nvdimm *nvdimm;
62 cxl_nvb = cxl_find_nvdimm_bridge(dev);
66 device_lock(&cxl_nvb->dev);
67 if (!cxl_nvb->nvdimm_bus) {
72 set_exclusive_cxl_commands(cxlds, exclusive_cmds);
73 rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
77 set_bit(NDD_LABELING, &flags);
78 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
79 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
80 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
81 nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
88 dev_set_drvdata(dev, nvdimm);
89 cxl_nvd->bridge = cxl_nvb;
90 rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
92 device_unlock(&cxl_nvb->dev);
93 put_device(&cxl_nvb->dev);
98 static struct cxl_driver cxl_nvdimm_driver = {
100 .probe = cxl_nvdimm_probe,
101 .id = CXL_DEVICE_NVDIMM,
104 static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
105 struct nd_cmd_get_config_size *cmd,
106 unsigned int buf_len)
108 if (sizeof(*cmd) > buf_len)
111 *cmd = (struct nd_cmd_get_config_size) {
112 .config_size = cxlds->lsa_size,
113 .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa),
119 static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
120 struct nd_cmd_get_config_data_hdr *cmd,
121 unsigned int buf_len)
123 struct cxl_mbox_get_lsa get_lsa;
126 if (sizeof(*cmd) > buf_len)
128 if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
131 get_lsa = (struct cxl_mbox_get_lsa) {
132 .offset = cpu_to_le32(cmd->in_offset),
133 .length = cpu_to_le32(cmd->in_length),
136 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
137 sizeof(get_lsa), cmd->out_buf, cmd->in_length);
143 static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
144 struct nd_cmd_set_config_hdr *cmd,
145 unsigned int buf_len)
147 struct cxl_mbox_set_lsa *set_lsa;
150 if (sizeof(*cmd) > buf_len)
153 /* 4-byte status follows the input data in the payload */
154 if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len)
158 kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL);
162 *set_lsa = (struct cxl_mbox_set_lsa) {
163 .offset = cpu_to_le32(cmd->in_offset),
165 memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
167 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa,
168 struct_size(set_lsa, data, cmd->in_length),
172 * Set "firmware" status (4-packed bytes at the end of the input
175 put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]);
181 static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
182 void *buf, unsigned int buf_len)
184 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
185 unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
186 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
187 struct cxl_dev_state *cxlds = cxlmd->cxlds;
189 if (!test_bit(cmd, &cmd_mask))
193 case ND_CMD_GET_CONFIG_SIZE:
194 return cxl_pmem_get_config_size(cxlds, buf, buf_len);
195 case ND_CMD_GET_CONFIG_DATA:
196 return cxl_pmem_get_config_data(cxlds, buf, buf_len);
197 case ND_CMD_SET_CONFIG_DATA:
198 return cxl_pmem_set_config_data(cxlds, buf, buf_len);
204 static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
205 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
206 unsigned int buf_len, int *cmd_rc)
209 * No firmware response to translate, let the transport error
210 * code take precedence.
216 return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
219 static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
221 if (cxl_nvb->nvdimm_bus)
223 cxl_nvb->nvdimm_bus =
224 nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
225 return cxl_nvb->nvdimm_bus != NULL;
228 static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
230 struct cxl_nvdimm *cxl_nvd;
232 if (!is_cxl_nvdimm(dev))
235 cxl_nvd = to_cxl_nvdimm(dev);
236 if (cxl_nvd->bridge != cxl_nvb)
239 device_release_driver(dev);
243 static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
245 struct cxl_pmem_region *cxlr_pmem;
247 if (!is_cxl_pmem_region(dev))
250 cxlr_pmem = to_cxl_pmem_region(dev);
251 if (cxlr_pmem->bridge != cxl_nvb)
254 device_release_driver(dev);
258 static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
259 struct nvdimm_bus *nvdimm_bus)
265 * Set the state of cxl_nvdimm devices to unbound / idle before
266 * nvdimm_bus_unregister() rips the nvdimm objects out from
269 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
270 cxl_pmem_region_release_driver);
271 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
272 cxl_nvdimm_release_driver);
273 nvdimm_bus_unregister(nvdimm_bus);
276 static void cxl_nvb_update_state(struct work_struct *work)
278 struct cxl_nvdimm_bridge *cxl_nvb =
279 container_of(work, typeof(*cxl_nvb), state_work);
280 struct nvdimm_bus *victim_bus = NULL;
281 bool release = false, rescan = false;
283 device_lock(&cxl_nvb->dev);
284 switch (cxl_nvb->state) {
286 if (!online_nvdimm_bus(cxl_nvb)) {
287 dev_err(&cxl_nvb->dev,
288 "failed to establish nvdimm bus\n");
293 case CXL_NVB_OFFLINE:
295 victim_bus = cxl_nvb->nvdimm_bus;
296 cxl_nvb->nvdimm_bus = NULL;
301 device_unlock(&cxl_nvb->dev);
304 device_release_driver(&cxl_nvb->dev);
306 int rc = bus_rescan_devices(&cxl_bus_type);
308 dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
310 offline_nvdimm_bus(cxl_nvb, victim_bus);
312 put_device(&cxl_nvb->dev);
315 static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
318 * Take a reference that the workqueue will drop if new work
321 get_device(&cxl_nvb->dev);
322 if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
323 put_device(&cxl_nvb->dev);
326 static void cxl_nvdimm_bridge_remove(struct device *dev)
328 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
330 if (cxl_nvb->state == CXL_NVB_ONLINE)
331 cxl_nvb->state = CXL_NVB_OFFLINE;
332 cxl_nvdimm_bridge_state_work(cxl_nvb);
335 static int cxl_nvdimm_bridge_probe(struct device *dev)
337 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
339 if (cxl_nvb->state == CXL_NVB_DEAD)
342 if (cxl_nvb->state == CXL_NVB_NEW) {
343 cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
344 .provider_name = "CXL",
345 .module = THIS_MODULE,
346 .ndctl = cxl_pmem_ctl,
349 INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state);
352 cxl_nvb->state = CXL_NVB_ONLINE;
353 cxl_nvdimm_bridge_state_work(cxl_nvb);
358 static struct cxl_driver cxl_nvdimm_bridge_driver = {
359 .name = "cxl_nvdimm_bridge",
360 .probe = cxl_nvdimm_bridge_probe,
361 .remove = cxl_nvdimm_bridge_remove,
362 .id = CXL_DEVICE_NVDIMM_BRIDGE,
365 static int match_cxl_nvdimm(struct device *dev, void *data)
367 return is_cxl_nvdimm(dev);
370 static void unregister_nvdimm_region(void *nd_region)
372 nvdimm_region_delete(nd_region);
375 static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
376 struct cxl_pmem_region *cxlr_pmem)
380 rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
381 cxlr_pmem, GFP_KERNEL);
385 get_device(&cxlr_pmem->dev);
389 static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd,
390 struct cxl_pmem_region *cxlr_pmem)
393 * It is possible this is called without a corresponding
394 * cxl_nvdimm_add_region for @cxlr_pmem
396 cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
398 put_device(&cxlr_pmem->dev);
401 static void release_mappings(void *data)
404 struct cxl_pmem_region *cxlr_pmem = data;
405 struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
407 device_lock(&cxl_nvb->dev);
408 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
409 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
410 struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
412 cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
414 device_unlock(&cxl_nvb->dev);
417 static void cxlr_pmem_remove_resource(void *res)
419 remove_resource(res);
422 struct cxl_pmem_region_info {
427 static int cxl_pmem_region_probe(struct device *dev)
429 struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
430 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
431 struct cxl_region *cxlr = cxlr_pmem->cxlr;
432 struct cxl_pmem_region_info *info = NULL;
433 struct cxl_nvdimm_bridge *cxl_nvb;
434 struct nd_interleave_set *nd_set;
435 struct nd_region_desc ndr_desc;
436 struct cxl_nvdimm *cxl_nvd;
437 struct nvdimm *nvdimm;
438 struct resource *res;
441 cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
443 dev_dbg(dev, "bridge not found\n");
446 cxlr_pmem->bridge = cxl_nvb;
448 device_lock(&cxl_nvb->dev);
449 if (!cxl_nvb->nvdimm_bus) {
450 dev_dbg(dev, "nvdimm bus not found\n");
455 memset(&mappings, 0, sizeof(mappings));
456 memset(&ndr_desc, 0, sizeof(ndr_desc));
458 res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
464 res->name = "Persistent Memory";
465 res->start = cxlr_pmem->hpa_range.start;
466 res->end = cxlr_pmem->hpa_range.end;
467 res->flags = IORESOURCE_MEM;
468 res->desc = IORES_DESC_PERSISTENT_MEMORY;
470 rc = insert_resource(&iomem_resource, res);
474 rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
479 ndr_desc.provider_data = cxlr_pmem;
481 ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
482 ndr_desc.target_node = phys_to_target_node(res->start);
483 if (ndr_desc.target_node == NUMA_NO_NODE) {
484 ndr_desc.target_node = ndr_desc.numa_node;
485 dev_dbg(&cxlr->dev, "changing target node from %d to %d",
486 NUMA_NO_NODE, ndr_desc.target_node);
489 nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
495 ndr_desc.memregion = cxlr->id;
496 set_bit(ND_REGION_CXL, &ndr_desc.flags);
497 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
499 info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
505 rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
509 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
510 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
511 struct cxl_memdev *cxlmd = m->cxlmd;
512 struct cxl_dev_state *cxlds = cxlmd->cxlds;
515 d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
517 dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
518 dev_name(&cxlmd->dev));
523 /* safe to drop ref now with bridge lock held */
526 cxl_nvd = to_cxl_nvdimm(d);
527 nvdimm = dev_get_drvdata(&cxl_nvd->dev);
529 dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
530 dev_name(&cxlmd->dev));
536 * Pin the region per nvdimm device as those may be released
537 * out-of-order with respect to the region, and a single nvdimm
538 * maybe associated with multiple regions
540 rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
543 m->cxl_nvd = cxl_nvd;
544 mappings[i] = (struct nd_mapping_desc) {
550 info[i].offset = m->start;
551 info[i].serial = cxlds->serial;
553 ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
554 ndr_desc.mapping = mappings;
557 * TODO enable CXL labels which skip the need for 'interleave-set cookie'
560 nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
561 nd_set->cookie2 = nd_set->cookie1;
562 ndr_desc.nd_set = nd_set;
564 cxlr_pmem->nd_region =
565 nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
566 if (!cxlr_pmem->nd_region) {
571 rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
572 cxlr_pmem->nd_region);
576 device_unlock(&cxl_nvb->dev);
577 put_device(&cxl_nvb->dev);
582 static struct cxl_driver cxl_pmem_region_driver = {
583 .name = "cxl_pmem_region",
584 .probe = cxl_pmem_region_probe,
585 .id = CXL_DEVICE_PMEM_REGION,
589 * Return all bridges to the CXL_NVB_NEW state to invalidate any
590 * ->state_work referring to the now destroyed cxl_pmem_wq.
592 static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
594 struct cxl_nvdimm_bridge *cxl_nvb;
596 if (!is_cxl_nvdimm_bridge(dev))
599 cxl_nvb = to_cxl_nvdimm_bridge(dev);
601 cxl_nvb->state = CXL_NVB_NEW;
607 static void destroy_cxl_pmem_wq(void)
609 destroy_workqueue(cxl_pmem_wq);
610 bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
613 static __init int cxl_pmem_init(void)
617 set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
618 set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
620 cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
624 rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
628 rc = cxl_driver_register(&cxl_nvdimm_driver);
632 rc = cxl_driver_register(&cxl_pmem_region_driver);
639 cxl_driver_unregister(&cxl_nvdimm_driver);
641 cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
643 destroy_cxl_pmem_wq();
647 static __exit void cxl_pmem_exit(void)
649 cxl_driver_unregister(&cxl_pmem_region_driver);
650 cxl_driver_unregister(&cxl_nvdimm_driver);
651 cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
652 destroy_cxl_pmem_wq();
655 MODULE_LICENSE("GPL v2");
656 module_init(cxl_pmem_init);
657 module_exit(cxl_pmem_exit);
658 MODULE_IMPORT_NS(CXL);
659 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
660 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
661 MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);