RISC-V: Fix a race condition during kernel stack overflow
[platform/kernel/linux-starfive.git] / drivers / cxl / pmem.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/libnvdimm.h>
4 #include <asm/unaligned.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/ndctl.h>
8 #include <linux/async.h>
9 #include <linux/slab.h>
10 #include <linux/nd.h>
11 #include "cxlmem.h"
12 #include "cxl.h"
13
14 /*
15  * Ordered workqueue for cxl nvdimm device arrival and departure
16  * to coordinate bus rescans when a bridge arrives and trigger remove
17  * operations when the bridge is removed.
18  */
19 static struct workqueue_struct *cxl_pmem_wq;
20
21 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
22
23 static void clear_exclusive(void *cxlds)
24 {
25         clear_exclusive_cxl_commands(cxlds, exclusive_cmds);
26 }
27
28 static void unregister_nvdimm(void *nvdimm)
29 {
30         struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
31         struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
32         struct cxl_pmem_region *cxlr_pmem;
33
34         device_lock(&cxl_nvb->dev);
35         cxlr_pmem = cxl_nvd->region;
36         dev_set_drvdata(&cxl_nvd->dev, NULL);
37         cxl_nvd->region = NULL;
38         device_unlock(&cxl_nvb->dev);
39
40         if (cxlr_pmem) {
41                 device_release_driver(&cxlr_pmem->dev);
42                 put_device(&cxlr_pmem->dev);
43         }
44
45         nvdimm_delete(nvdimm);
46         cxl_nvd->bridge = NULL;
47 }
48
49 static int cxl_nvdimm_probe(struct device *dev)
50 {
51         struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
52         struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
53         unsigned long flags = 0, cmd_mask = 0;
54         struct cxl_dev_state *cxlds = cxlmd->cxlds;
55         struct cxl_nvdimm_bridge *cxl_nvb;
56         struct nvdimm *nvdimm;
57         int rc;
58
59         cxl_nvb = cxl_find_nvdimm_bridge(dev);
60         if (!cxl_nvb)
61                 return -ENXIO;
62
63         device_lock(&cxl_nvb->dev);
64         if (!cxl_nvb->nvdimm_bus) {
65                 rc = -ENXIO;
66                 goto out;
67         }
68
69         set_exclusive_cxl_commands(cxlds, exclusive_cmds);
70         rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
71         if (rc)
72                 goto out;
73
74         set_bit(NDD_LABELING, &flags);
75         set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
76         set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
77         set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
78         nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
79                                cmd_mask, 0, NULL);
80         if (!nvdimm) {
81                 rc = -ENOMEM;
82                 goto out;
83         }
84
85         dev_set_drvdata(dev, nvdimm);
86         cxl_nvd->bridge = cxl_nvb;
87         rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
88 out:
89         device_unlock(&cxl_nvb->dev);
90         put_device(&cxl_nvb->dev);
91
92         return rc;
93 }
94
95 static struct cxl_driver cxl_nvdimm_driver = {
96         .name = "cxl_nvdimm",
97         .probe = cxl_nvdimm_probe,
98         .id = CXL_DEVICE_NVDIMM,
99 };
100
101 static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
102                                     struct nd_cmd_get_config_size *cmd,
103                                     unsigned int buf_len)
104 {
105         if (sizeof(*cmd) > buf_len)
106                 return -EINVAL;
107
108         *cmd = (struct nd_cmd_get_config_size) {
109                  .config_size = cxlds->lsa_size,
110                  .max_xfer = cxlds->payload_size,
111         };
112
113         return 0;
114 }
115
116 static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
117                                     struct nd_cmd_get_config_data_hdr *cmd,
118                                     unsigned int buf_len)
119 {
120         struct cxl_mbox_get_lsa get_lsa;
121         int rc;
122
123         if (sizeof(*cmd) > buf_len)
124                 return -EINVAL;
125         if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
126                 return -EINVAL;
127
128         get_lsa = (struct cxl_mbox_get_lsa) {
129                 .offset = cpu_to_le32(cmd->in_offset),
130                 .length = cpu_to_le32(cmd->in_length),
131         };
132
133         rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
134                                sizeof(get_lsa), cmd->out_buf, cmd->in_length);
135         cmd->status = 0;
136
137         return rc;
138 }
139
140 static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
141                                     struct nd_cmd_set_config_hdr *cmd,
142                                     unsigned int buf_len)
143 {
144         struct cxl_mbox_set_lsa *set_lsa;
145         int rc;
146
147         if (sizeof(*cmd) > buf_len)
148                 return -EINVAL;
149
150         /* 4-byte status follows the input data in the payload */
151         if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len)
152                 return -EINVAL;
153
154         set_lsa =
155                 kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL);
156         if (!set_lsa)
157                 return -ENOMEM;
158
159         *set_lsa = (struct cxl_mbox_set_lsa) {
160                 .offset = cpu_to_le32(cmd->in_offset),
161         };
162         memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
163
164         rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa,
165                                struct_size(set_lsa, data, cmd->in_length),
166                                NULL, 0);
167
168         /*
169          * Set "firmware" status (4-packed bytes at the end of the input
170          * payload.
171          */
172         put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]);
173         kvfree(set_lsa);
174
175         return rc;
176 }
177
178 static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
179                                void *buf, unsigned int buf_len)
180 {
181         struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
182         unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
183         struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
184         struct cxl_dev_state *cxlds = cxlmd->cxlds;
185
186         if (!test_bit(cmd, &cmd_mask))
187                 return -ENOTTY;
188
189         switch (cmd) {
190         case ND_CMD_GET_CONFIG_SIZE:
191                 return cxl_pmem_get_config_size(cxlds, buf, buf_len);
192         case ND_CMD_GET_CONFIG_DATA:
193                 return cxl_pmem_get_config_data(cxlds, buf, buf_len);
194         case ND_CMD_SET_CONFIG_DATA:
195                 return cxl_pmem_set_config_data(cxlds, buf, buf_len);
196         default:
197                 return -ENOTTY;
198         }
199 }
200
201 static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
202                         struct nvdimm *nvdimm, unsigned int cmd, void *buf,
203                         unsigned int buf_len, int *cmd_rc)
204 {
205         /*
206          * No firmware response to translate, let the transport error
207          * code take precedence.
208          */
209         *cmd_rc = 0;
210
211         if (!nvdimm)
212                 return -ENOTTY;
213         return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
214 }
215
216 static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
217 {
218         if (cxl_nvb->nvdimm_bus)
219                 return true;
220         cxl_nvb->nvdimm_bus =
221                 nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
222         return cxl_nvb->nvdimm_bus != NULL;
223 }
224
225 static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
226 {
227         struct cxl_nvdimm *cxl_nvd;
228
229         if (!is_cxl_nvdimm(dev))
230                 return 0;
231
232         cxl_nvd = to_cxl_nvdimm(dev);
233         if (cxl_nvd->bridge != cxl_nvb)
234                 return 0;
235
236         device_release_driver(dev);
237         return 0;
238 }
239
240 static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
241 {
242         struct cxl_pmem_region *cxlr_pmem;
243
244         if (!is_cxl_pmem_region(dev))
245                 return 0;
246
247         cxlr_pmem = to_cxl_pmem_region(dev);
248         if (cxlr_pmem->bridge != cxl_nvb)
249                 return 0;
250
251         device_release_driver(dev);
252         return 0;
253 }
254
255 static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
256                                struct nvdimm_bus *nvdimm_bus)
257 {
258         if (!nvdimm_bus)
259                 return;
260
261         /*
262          * Set the state of cxl_nvdimm devices to unbound / idle before
263          * nvdimm_bus_unregister() rips the nvdimm objects out from
264          * underneath them.
265          */
266         bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
267                          cxl_pmem_region_release_driver);
268         bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
269                          cxl_nvdimm_release_driver);
270         nvdimm_bus_unregister(nvdimm_bus);
271 }
272
273 static void cxl_nvb_update_state(struct work_struct *work)
274 {
275         struct cxl_nvdimm_bridge *cxl_nvb =
276                 container_of(work, typeof(*cxl_nvb), state_work);
277         struct nvdimm_bus *victim_bus = NULL;
278         bool release = false, rescan = false;
279
280         device_lock(&cxl_nvb->dev);
281         switch (cxl_nvb->state) {
282         case CXL_NVB_ONLINE:
283                 if (!online_nvdimm_bus(cxl_nvb)) {
284                         dev_err(&cxl_nvb->dev,
285                                 "failed to establish nvdimm bus\n");
286                         release = true;
287                 } else
288                         rescan = true;
289                 break;
290         case CXL_NVB_OFFLINE:
291         case CXL_NVB_DEAD:
292                 victim_bus = cxl_nvb->nvdimm_bus;
293                 cxl_nvb->nvdimm_bus = NULL;
294                 break;
295         default:
296                 break;
297         }
298         device_unlock(&cxl_nvb->dev);
299
300         if (release)
301                 device_release_driver(&cxl_nvb->dev);
302         if (rescan) {
303                 int rc = bus_rescan_devices(&cxl_bus_type);
304
305                 dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
306         }
307         offline_nvdimm_bus(cxl_nvb, victim_bus);
308
309         put_device(&cxl_nvb->dev);
310 }
311
312 static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
313 {
314         /*
315          * Take a reference that the workqueue will drop if new work
316          * gets queued.
317          */
318         get_device(&cxl_nvb->dev);
319         if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
320                 put_device(&cxl_nvb->dev);
321 }
322
323 static void cxl_nvdimm_bridge_remove(struct device *dev)
324 {
325         struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
326
327         if (cxl_nvb->state == CXL_NVB_ONLINE)
328                 cxl_nvb->state = CXL_NVB_OFFLINE;
329         cxl_nvdimm_bridge_state_work(cxl_nvb);
330 }
331
332 static int cxl_nvdimm_bridge_probe(struct device *dev)
333 {
334         struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
335
336         if (cxl_nvb->state == CXL_NVB_DEAD)
337                 return -ENXIO;
338
339         if (cxl_nvb->state == CXL_NVB_NEW) {
340                 cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
341                         .provider_name = "CXL",
342                         .module = THIS_MODULE,
343                         .ndctl = cxl_pmem_ctl,
344                 };
345
346                 INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state);
347         }
348
349         cxl_nvb->state = CXL_NVB_ONLINE;
350         cxl_nvdimm_bridge_state_work(cxl_nvb);
351
352         return 0;
353 }
354
355 static struct cxl_driver cxl_nvdimm_bridge_driver = {
356         .name = "cxl_nvdimm_bridge",
357         .probe = cxl_nvdimm_bridge_probe,
358         .remove = cxl_nvdimm_bridge_remove,
359         .id = CXL_DEVICE_NVDIMM_BRIDGE,
360 };
361
362 static int match_cxl_nvdimm(struct device *dev, void *data)
363 {
364         return is_cxl_nvdimm(dev);
365 }
366
367 static void unregister_nvdimm_region(void *nd_region)
368 {
369         struct cxl_nvdimm_bridge *cxl_nvb;
370         struct cxl_pmem_region *cxlr_pmem;
371         int i;
372
373         cxlr_pmem = nd_region_provider_data(nd_region);
374         cxl_nvb = cxlr_pmem->bridge;
375         device_lock(&cxl_nvb->dev);
376         for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
377                 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
378                 struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
379
380                 if (cxl_nvd->region) {
381                         put_device(&cxlr_pmem->dev);
382                         cxl_nvd->region = NULL;
383                 }
384         }
385         device_unlock(&cxl_nvb->dev);
386
387         nvdimm_region_delete(nd_region);
388 }
389
390 static void cxlr_pmem_remove_resource(void *res)
391 {
392         remove_resource(res);
393 }
394
395 struct cxl_pmem_region_info {
396         u64 offset;
397         u64 serial;
398 };
399
400 static int cxl_pmem_region_probe(struct device *dev)
401 {
402         struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
403         struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
404         struct cxl_region *cxlr = cxlr_pmem->cxlr;
405         struct cxl_pmem_region_info *info = NULL;
406         struct cxl_nvdimm_bridge *cxl_nvb;
407         struct nd_interleave_set *nd_set;
408         struct nd_region_desc ndr_desc;
409         struct cxl_nvdimm *cxl_nvd;
410         struct nvdimm *nvdimm;
411         struct resource *res;
412         int rc, i = 0;
413
414         cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
415         if (!cxl_nvb) {
416                 dev_dbg(dev, "bridge not found\n");
417                 return -ENXIO;
418         }
419         cxlr_pmem->bridge = cxl_nvb;
420
421         device_lock(&cxl_nvb->dev);
422         if (!cxl_nvb->nvdimm_bus) {
423                 dev_dbg(dev, "nvdimm bus not found\n");
424                 rc = -ENXIO;
425                 goto err;
426         }
427
428         memset(&mappings, 0, sizeof(mappings));
429         memset(&ndr_desc, 0, sizeof(ndr_desc));
430
431         res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
432         if (!res) {
433                 rc = -ENOMEM;
434                 goto err;
435         }
436
437         res->name = "Persistent Memory";
438         res->start = cxlr_pmem->hpa_range.start;
439         res->end = cxlr_pmem->hpa_range.end;
440         res->flags = IORESOURCE_MEM;
441         res->desc = IORES_DESC_PERSISTENT_MEMORY;
442
443         rc = insert_resource(&iomem_resource, res);
444         if (rc)
445                 goto err;
446
447         rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
448         if (rc)
449                 goto err;
450
451         ndr_desc.res = res;
452         ndr_desc.provider_data = cxlr_pmem;
453
454         ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
455         ndr_desc.target_node = phys_to_target_node(res->start);
456         if (ndr_desc.target_node == NUMA_NO_NODE) {
457                 ndr_desc.target_node = ndr_desc.numa_node;
458                 dev_dbg(&cxlr->dev, "changing target node from %d to %d",
459                         NUMA_NO_NODE, ndr_desc.target_node);
460         }
461
462         nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
463         if (!nd_set) {
464                 rc = -ENOMEM;
465                 goto err;
466         }
467
468         ndr_desc.memregion = cxlr->id;
469         set_bit(ND_REGION_CXL, &ndr_desc.flags);
470         set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
471
472         info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
473         if (!info) {
474                 rc = -ENOMEM;
475                 goto err;
476         }
477
478         for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
479                 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
480                 struct cxl_memdev *cxlmd = m->cxlmd;
481                 struct cxl_dev_state *cxlds = cxlmd->cxlds;
482                 struct device *d;
483
484                 d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
485                 if (!d) {
486                         dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
487                                 dev_name(&cxlmd->dev));
488                         rc = -ENODEV;
489                         goto err;
490                 }
491
492                 /* safe to drop ref now with bridge lock held */
493                 put_device(d);
494
495                 cxl_nvd = to_cxl_nvdimm(d);
496                 nvdimm = dev_get_drvdata(&cxl_nvd->dev);
497                 if (!nvdimm) {
498                         dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
499                                 dev_name(&cxlmd->dev));
500                         rc = -ENODEV;
501                         goto err;
502                 }
503                 cxl_nvd->region = cxlr_pmem;
504                 get_device(&cxlr_pmem->dev);
505                 m->cxl_nvd = cxl_nvd;
506                 mappings[i] = (struct nd_mapping_desc) {
507                         .nvdimm = nvdimm,
508                         .start = m->start,
509                         .size = m->size,
510                         .position = i,
511                 };
512                 info[i].offset = m->start;
513                 info[i].serial = cxlds->serial;
514         }
515         ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
516         ndr_desc.mapping = mappings;
517
518         /*
519          * TODO enable CXL labels which skip the need for 'interleave-set cookie'
520          */
521         nd_set->cookie1 =
522                 nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
523         nd_set->cookie2 = nd_set->cookie1;
524         ndr_desc.nd_set = nd_set;
525
526         cxlr_pmem->nd_region =
527                 nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
528         if (!cxlr_pmem->nd_region) {
529                 rc = -ENOMEM;
530                 goto err;
531         }
532
533         rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
534                                       cxlr_pmem->nd_region);
535 out:
536         kfree(info);
537         device_unlock(&cxl_nvb->dev);
538         put_device(&cxl_nvb->dev);
539
540         return rc;
541
542 err:
543         dev_dbg(dev, "failed to create nvdimm region\n");
544         for (i--; i >= 0; i--) {
545                 nvdimm = mappings[i].nvdimm;
546                 cxl_nvd = nvdimm_provider_data(nvdimm);
547                 put_device(&cxl_nvd->region->dev);
548                 cxl_nvd->region = NULL;
549         }
550         goto out;
551 }
552
553 static struct cxl_driver cxl_pmem_region_driver = {
554         .name = "cxl_pmem_region",
555         .probe = cxl_pmem_region_probe,
556         .id = CXL_DEVICE_PMEM_REGION,
557 };
558
559 /*
560  * Return all bridges to the CXL_NVB_NEW state to invalidate any
561  * ->state_work referring to the now destroyed cxl_pmem_wq.
562  */
563 static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
564 {
565         struct cxl_nvdimm_bridge *cxl_nvb;
566
567         if (!is_cxl_nvdimm_bridge(dev))
568                 return 0;
569
570         cxl_nvb = to_cxl_nvdimm_bridge(dev);
571         device_lock(dev);
572         cxl_nvb->state = CXL_NVB_NEW;
573         device_unlock(dev);
574
575         return 0;
576 }
577
578 static void destroy_cxl_pmem_wq(void)
579 {
580         destroy_workqueue(cxl_pmem_wq);
581         bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
582 }
583
584 static __init int cxl_pmem_init(void)
585 {
586         int rc;
587
588         set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
589         set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
590
591         cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
592         if (!cxl_pmem_wq)
593                 return -ENXIO;
594
595         rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
596         if (rc)
597                 goto err_bridge;
598
599         rc = cxl_driver_register(&cxl_nvdimm_driver);
600         if (rc)
601                 goto err_nvdimm;
602
603         rc = cxl_driver_register(&cxl_pmem_region_driver);
604         if (rc)
605                 goto err_region;
606
607         return 0;
608
609 err_region:
610         cxl_driver_unregister(&cxl_nvdimm_driver);
611 err_nvdimm:
612         cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
613 err_bridge:
614         destroy_cxl_pmem_wq();
615         return rc;
616 }
617
618 static __exit void cxl_pmem_exit(void)
619 {
620         cxl_driver_unregister(&cxl_pmem_region_driver);
621         cxl_driver_unregister(&cxl_nvdimm_driver);
622         cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
623         destroy_cxl_pmem_wq();
624 }
625
626 MODULE_LICENSE("GPL v2");
627 module_init(cxl_pmem_init);
628 module_exit(cxl_pmem_exit);
629 MODULE_IMPORT_NS(CXL);
630 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
631 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
632 MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);