1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/libnvdimm.h>
4 #include <asm/unaligned.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/ndctl.h>
8 #include <linux/async.h>
9 #include <linux/slab.h>
15 * Ordered workqueue for cxl nvdimm device arrival and departure
16 * to coordinate bus rescans when a bridge arrives and trigger remove
17 * operations when the bridge is removed.
19 static struct workqueue_struct *cxl_pmem_wq;
21 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
23 static void clear_exclusive(void *cxlds)
25 clear_exclusive_cxl_commands(cxlds, exclusive_cmds);
28 static void unregister_nvdimm(void *nvdimm)
30 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
31 struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
32 struct cxl_pmem_region *cxlr_pmem;
34 device_lock(&cxl_nvb->dev);
35 cxlr_pmem = cxl_nvd->region;
36 dev_set_drvdata(&cxl_nvd->dev, NULL);
37 cxl_nvd->region = NULL;
38 device_unlock(&cxl_nvb->dev);
41 device_release_driver(&cxlr_pmem->dev);
42 put_device(&cxlr_pmem->dev);
45 nvdimm_delete(nvdimm);
46 cxl_nvd->bridge = NULL;
49 static int cxl_nvdimm_probe(struct device *dev)
51 struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
52 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
53 unsigned long flags = 0, cmd_mask = 0;
54 struct cxl_dev_state *cxlds = cxlmd->cxlds;
55 struct cxl_nvdimm_bridge *cxl_nvb;
56 struct nvdimm *nvdimm;
59 cxl_nvb = cxl_find_nvdimm_bridge(dev);
63 device_lock(&cxl_nvb->dev);
64 if (!cxl_nvb->nvdimm_bus) {
69 set_exclusive_cxl_commands(cxlds, exclusive_cmds);
70 rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
74 set_bit(NDD_LABELING, &flags);
75 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
76 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
77 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
78 nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
85 dev_set_drvdata(dev, nvdimm);
86 cxl_nvd->bridge = cxl_nvb;
87 rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
89 device_unlock(&cxl_nvb->dev);
90 put_device(&cxl_nvb->dev);
95 static struct cxl_driver cxl_nvdimm_driver = {
97 .probe = cxl_nvdimm_probe,
98 .id = CXL_DEVICE_NVDIMM,
101 static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
102 struct nd_cmd_get_config_size *cmd,
103 unsigned int buf_len)
105 if (sizeof(*cmd) > buf_len)
108 *cmd = (struct nd_cmd_get_config_size) {
109 .config_size = cxlds->lsa_size,
110 .max_xfer = cxlds->payload_size,
116 static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
117 struct nd_cmd_get_config_data_hdr *cmd,
118 unsigned int buf_len)
120 struct cxl_mbox_get_lsa get_lsa;
123 if (sizeof(*cmd) > buf_len)
125 if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
128 get_lsa = (struct cxl_mbox_get_lsa) {
129 .offset = cpu_to_le32(cmd->in_offset),
130 .length = cpu_to_le32(cmd->in_length),
133 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
134 sizeof(get_lsa), cmd->out_buf, cmd->in_length);
140 static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
141 struct nd_cmd_set_config_hdr *cmd,
142 unsigned int buf_len)
144 struct cxl_mbox_set_lsa *set_lsa;
147 if (sizeof(*cmd) > buf_len)
150 /* 4-byte status follows the input data in the payload */
151 if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len)
155 kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL);
159 *set_lsa = (struct cxl_mbox_set_lsa) {
160 .offset = cpu_to_le32(cmd->in_offset),
162 memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
164 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa,
165 struct_size(set_lsa, data, cmd->in_length),
169 * Set "firmware" status (4-packed bytes at the end of the input
172 put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]);
178 static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
179 void *buf, unsigned int buf_len)
181 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
182 unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
183 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
184 struct cxl_dev_state *cxlds = cxlmd->cxlds;
186 if (!test_bit(cmd, &cmd_mask))
190 case ND_CMD_GET_CONFIG_SIZE:
191 return cxl_pmem_get_config_size(cxlds, buf, buf_len);
192 case ND_CMD_GET_CONFIG_DATA:
193 return cxl_pmem_get_config_data(cxlds, buf, buf_len);
194 case ND_CMD_SET_CONFIG_DATA:
195 return cxl_pmem_set_config_data(cxlds, buf, buf_len);
201 static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
202 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
203 unsigned int buf_len, int *cmd_rc)
206 * No firmware response to translate, let the transport error
207 * code take precedence.
213 return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
216 static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
218 if (cxl_nvb->nvdimm_bus)
220 cxl_nvb->nvdimm_bus =
221 nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
222 return cxl_nvb->nvdimm_bus != NULL;
225 static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
227 struct cxl_nvdimm *cxl_nvd;
229 if (!is_cxl_nvdimm(dev))
232 cxl_nvd = to_cxl_nvdimm(dev);
233 if (cxl_nvd->bridge != cxl_nvb)
236 device_release_driver(dev);
240 static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
242 struct cxl_pmem_region *cxlr_pmem;
244 if (!is_cxl_pmem_region(dev))
247 cxlr_pmem = to_cxl_pmem_region(dev);
248 if (cxlr_pmem->bridge != cxl_nvb)
251 device_release_driver(dev);
255 static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
256 struct nvdimm_bus *nvdimm_bus)
262 * Set the state of cxl_nvdimm devices to unbound / idle before
263 * nvdimm_bus_unregister() rips the nvdimm objects out from
266 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
267 cxl_pmem_region_release_driver);
268 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
269 cxl_nvdimm_release_driver);
270 nvdimm_bus_unregister(nvdimm_bus);
273 static void cxl_nvb_update_state(struct work_struct *work)
275 struct cxl_nvdimm_bridge *cxl_nvb =
276 container_of(work, typeof(*cxl_nvb), state_work);
277 struct nvdimm_bus *victim_bus = NULL;
278 bool release = false, rescan = false;
280 device_lock(&cxl_nvb->dev);
281 switch (cxl_nvb->state) {
283 if (!online_nvdimm_bus(cxl_nvb)) {
284 dev_err(&cxl_nvb->dev,
285 "failed to establish nvdimm bus\n");
290 case CXL_NVB_OFFLINE:
292 victim_bus = cxl_nvb->nvdimm_bus;
293 cxl_nvb->nvdimm_bus = NULL;
298 device_unlock(&cxl_nvb->dev);
301 device_release_driver(&cxl_nvb->dev);
303 int rc = bus_rescan_devices(&cxl_bus_type);
305 dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
307 offline_nvdimm_bus(cxl_nvb, victim_bus);
309 put_device(&cxl_nvb->dev);
312 static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
315 * Take a reference that the workqueue will drop if new work
318 get_device(&cxl_nvb->dev);
319 if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
320 put_device(&cxl_nvb->dev);
323 static void cxl_nvdimm_bridge_remove(struct device *dev)
325 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
327 if (cxl_nvb->state == CXL_NVB_ONLINE)
328 cxl_nvb->state = CXL_NVB_OFFLINE;
329 cxl_nvdimm_bridge_state_work(cxl_nvb);
332 static int cxl_nvdimm_bridge_probe(struct device *dev)
334 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
336 if (cxl_nvb->state == CXL_NVB_DEAD)
339 if (cxl_nvb->state == CXL_NVB_NEW) {
340 cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
341 .provider_name = "CXL",
342 .module = THIS_MODULE,
343 .ndctl = cxl_pmem_ctl,
346 INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state);
349 cxl_nvb->state = CXL_NVB_ONLINE;
350 cxl_nvdimm_bridge_state_work(cxl_nvb);
355 static struct cxl_driver cxl_nvdimm_bridge_driver = {
356 .name = "cxl_nvdimm_bridge",
357 .probe = cxl_nvdimm_bridge_probe,
358 .remove = cxl_nvdimm_bridge_remove,
359 .id = CXL_DEVICE_NVDIMM_BRIDGE,
362 static int match_cxl_nvdimm(struct device *dev, void *data)
364 return is_cxl_nvdimm(dev);
367 static void unregister_nvdimm_region(void *nd_region)
369 struct cxl_nvdimm_bridge *cxl_nvb;
370 struct cxl_pmem_region *cxlr_pmem;
373 cxlr_pmem = nd_region_provider_data(nd_region);
374 cxl_nvb = cxlr_pmem->bridge;
375 device_lock(&cxl_nvb->dev);
376 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
377 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
378 struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
380 if (cxl_nvd->region) {
381 put_device(&cxlr_pmem->dev);
382 cxl_nvd->region = NULL;
385 device_unlock(&cxl_nvb->dev);
387 nvdimm_region_delete(nd_region);
390 static void cxlr_pmem_remove_resource(void *res)
392 remove_resource(res);
395 struct cxl_pmem_region_info {
400 static int cxl_pmem_region_probe(struct device *dev)
402 struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
403 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
404 struct cxl_region *cxlr = cxlr_pmem->cxlr;
405 struct cxl_pmem_region_info *info = NULL;
406 struct cxl_nvdimm_bridge *cxl_nvb;
407 struct nd_interleave_set *nd_set;
408 struct nd_region_desc ndr_desc;
409 struct cxl_nvdimm *cxl_nvd;
410 struct nvdimm *nvdimm;
411 struct resource *res;
414 cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
416 dev_dbg(dev, "bridge not found\n");
419 cxlr_pmem->bridge = cxl_nvb;
421 device_lock(&cxl_nvb->dev);
422 if (!cxl_nvb->nvdimm_bus) {
423 dev_dbg(dev, "nvdimm bus not found\n");
428 memset(&mappings, 0, sizeof(mappings));
429 memset(&ndr_desc, 0, sizeof(ndr_desc));
431 res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
437 res->name = "Persistent Memory";
438 res->start = cxlr_pmem->hpa_range.start;
439 res->end = cxlr_pmem->hpa_range.end;
440 res->flags = IORESOURCE_MEM;
441 res->desc = IORES_DESC_PERSISTENT_MEMORY;
443 rc = insert_resource(&iomem_resource, res);
447 rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
452 ndr_desc.provider_data = cxlr_pmem;
454 ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
455 ndr_desc.target_node = phys_to_target_node(res->start);
456 if (ndr_desc.target_node == NUMA_NO_NODE) {
457 ndr_desc.target_node = ndr_desc.numa_node;
458 dev_dbg(&cxlr->dev, "changing target node from %d to %d",
459 NUMA_NO_NODE, ndr_desc.target_node);
462 nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
468 ndr_desc.memregion = cxlr->id;
469 set_bit(ND_REGION_CXL, &ndr_desc.flags);
470 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
472 info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
478 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
479 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
480 struct cxl_memdev *cxlmd = m->cxlmd;
481 struct cxl_dev_state *cxlds = cxlmd->cxlds;
484 d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
486 dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
487 dev_name(&cxlmd->dev));
492 /* safe to drop ref now with bridge lock held */
495 cxl_nvd = to_cxl_nvdimm(d);
496 nvdimm = dev_get_drvdata(&cxl_nvd->dev);
498 dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
499 dev_name(&cxlmd->dev));
503 cxl_nvd->region = cxlr_pmem;
504 get_device(&cxlr_pmem->dev);
505 m->cxl_nvd = cxl_nvd;
506 mappings[i] = (struct nd_mapping_desc) {
512 info[i].offset = m->start;
513 info[i].serial = cxlds->serial;
515 ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
516 ndr_desc.mapping = mappings;
519 * TODO enable CXL labels which skip the need for 'interleave-set cookie'
522 nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
523 nd_set->cookie2 = nd_set->cookie1;
524 ndr_desc.nd_set = nd_set;
526 cxlr_pmem->nd_region =
527 nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
528 if (!cxlr_pmem->nd_region) {
533 rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
534 cxlr_pmem->nd_region);
537 device_unlock(&cxl_nvb->dev);
538 put_device(&cxl_nvb->dev);
543 dev_dbg(dev, "failed to create nvdimm region\n");
544 for (i--; i >= 0; i--) {
545 nvdimm = mappings[i].nvdimm;
546 cxl_nvd = nvdimm_provider_data(nvdimm);
547 put_device(&cxl_nvd->region->dev);
548 cxl_nvd->region = NULL;
553 static struct cxl_driver cxl_pmem_region_driver = {
554 .name = "cxl_pmem_region",
555 .probe = cxl_pmem_region_probe,
556 .id = CXL_DEVICE_PMEM_REGION,
560 * Return all bridges to the CXL_NVB_NEW state to invalidate any
561 * ->state_work referring to the now destroyed cxl_pmem_wq.
563 static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
565 struct cxl_nvdimm_bridge *cxl_nvb;
567 if (!is_cxl_nvdimm_bridge(dev))
570 cxl_nvb = to_cxl_nvdimm_bridge(dev);
572 cxl_nvb->state = CXL_NVB_NEW;
578 static void destroy_cxl_pmem_wq(void)
580 destroy_workqueue(cxl_pmem_wq);
581 bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
584 static __init int cxl_pmem_init(void)
588 set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
589 set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
591 cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
595 rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
599 rc = cxl_driver_register(&cxl_nvdimm_driver);
603 rc = cxl_driver_register(&cxl_pmem_region_driver);
610 cxl_driver_unregister(&cxl_nvdimm_driver);
612 cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
614 destroy_cxl_pmem_wq();
618 static __exit void cxl_pmem_exit(void)
620 cxl_driver_unregister(&cxl_pmem_region_driver);
621 cxl_driver_unregister(&cxl_nvdimm_driver);
622 cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
623 destroy_cxl_pmem_wq();
626 MODULE_LICENSE("GPL v2");
627 module_init(cxl_pmem_init);
628 module_exit(cxl_pmem_exit);
629 MODULE_IMPORT_NS(CXL);
630 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
631 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
632 MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);