cxl/region: Fix region setup/teardown for RCDs
authorDan Williams <dan.j.williams@intel.com>
Mon, 3 Apr 2023 21:44:41 +0000 (14:44 -0700)
committerDan Williams <dan.j.williams@intel.com>
Tue, 4 Apr 2023 22:34:34 +0000 (15:34 -0700)
RCDs (CXL memory devices that link train without VH capability and show
up as root complex integrated endpoints), hide the presence of the link
between the endpoint and the host-bridge. The CXL region setup/teardown
paths assume that a link hop is present and go looking for at least one
'struct cxl_port' instance between the CXL root port-object and an
endpoint port-object leading to crashes of the form:

    BUG: kernel NULL pointer dereference, address: 0000000000000008
    [..]
    RIP: 0010:cxl_region_setup_targets+0x3e9/0xae0 [cxl_core]
    [..]
    Call Trace:
     <TASK>
     cxl_region_attach+0x46c/0x7a0 [cxl_core]
     cxl_create_region+0x20b/0x270 [cxl_core]
     cxl_mock_mem_probe+0x641/0x800 [cxl_mock_mem]
     platform_probe+0x5b/0xb0

Detect RCDs explicitly and skip walking the non-existent port hierarchy
between root and endpoint in that case.

While this has been a problem since:

commit 0a19bfc8de93 ("cxl/port: Add RCD endpoint port enumeration")

...it becomes a more reliable crash scenario with the new autodiscovery
implementation.

Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/168002858268.50647.728091521032131326.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
drivers/cxl/core/region.c

index 808f23ec4e2bc18f7931193f85b198d272fb43d1..52bbf6268d5fb8b004aadb846bac7a12ecfaac5b 100644 (file)
@@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
                struct cxl_endpoint_decoder *cxled = p->targets[i];
                struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
                struct cxl_port *iter = cxled_to_port(cxled);
+               struct cxl_dev_state *cxlds = cxlmd->cxlds;
                struct cxl_ep *ep;
                int rc = 0;
 
+               if (cxlds->rcd)
+                       goto endpoint_reset;
+
                while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
                        iter = to_cxl_port(iter->dev.parent);
 
@@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
                                return rc;
                }
 
+endpoint_reset:
                rc = cxled->cxld.reset(&cxled->cxld);
                if (rc)
                        return rc;
@@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
 {
        struct cxl_region_params *p = &cxlr->params;
        struct cxl_endpoint_decoder *cxled;
+       struct cxl_dev_state *cxlds;
        struct cxl_memdev *cxlmd;
        struct cxl_port *iter;
        struct cxl_ep *ep;
@@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
        for (i = 0; i < p->nr_targets; i++) {
                cxled = p->targets[i];
                cxlmd = cxled_to_memdev(cxled);
+               cxlds = cxlmd->cxlds;
+
+               if (cxlds->rcd)
+                       continue;
 
                iter = cxled_to_port(cxled);
                while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
 {
        struct cxl_region_params *p = &cxlr->params;
        struct cxl_endpoint_decoder *cxled;
+       struct cxl_dev_state *cxlds;
+       int i, rc, rch = 0, vh = 0;
        struct cxl_memdev *cxlmd;
        struct cxl_port *iter;
        struct cxl_ep *ep;
-       int i, rc;
 
        for (i = 0; i < p->nr_targets; i++) {
                cxled = p->targets[i];
                cxlmd = cxled_to_memdev(cxled);
+               cxlds = cxlmd->cxlds;
+
+               /* validate that all targets agree on topology */
+               if (!cxlds->rcd) {
+                       vh++;
+               } else {
+                       rch++;
+                       continue;
+               }
 
                iter = cxled_to_port(cxled);
                while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
                }
        }
 
+       if (rch && vh) {
+               dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
+               cxl_region_teardown_targets(cxlr);
+               return -ENXIO;
+       }
+
        return 0;
 }