drm/amdgpu: move xgmi init/fini to xgmi_add/remove_device call (v2)
authorHawking Zhang <Hawking.Zhang@amd.com>
Mon, 23 Dec 2019 08:51:42 +0000 (16:51 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 6 Feb 2020 20:04:36 +0000 (15:04 -0500)
For sriov, psp ip block has to be initialized before
ih block for the dynamic register programming interface
that needed for vf ih ring buffer. On the other hand,
current psp ip block hw_init function will initialize
xgmi session which actaully depends on interrupt to
return session context. This results an empty xgmi ta
session id and later failures on all the xgmi ta cmd
invoked from vf. xgmi ta session initialization has to
be done after ih ip block hw_init call.

to unify xgmi session init/fini for both bare-metal
sriov virtualization use scenario, move xgmi ta init
to xgmi_add_device call, and accordingly terminate xgmi
ta session in xgmi_remove_device call.

The existing suspend/resume sequence will not be changed.

v2: squash in return fix from Nirmoy

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

index 3a1570dafe3482ac93992c0e76e1777d185721d6..939a114605c09a3ead6e5be447a910856ee6fbb7 100644 (file)
@@ -558,7 +558,7 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
        return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id);
 }
 
-static int psp_xgmi_terminate(struct psp_context *psp)
+int psp_xgmi_terminate(struct psp_context *psp)
 {
        int ret;
 
@@ -579,7 +579,7 @@ static int psp_xgmi_terminate(struct psp_context *psp)
        return 0;
 }
 
-static int psp_xgmi_initialize(struct psp_context *psp)
+int psp_xgmi_initialize(struct psp_context *psp)
 {
        struct ta_xgmi_shared_memory *xgmi_cmd;
        int ret;
@@ -1420,16 +1420,6 @@ skip_memalloc:
                return ret;
        }
 
-       if (adev->gmc.xgmi.num_physical_nodes > 1) {
-               ret = psp_xgmi_initialize(psp);
-               /* Warning the XGMI seesion initialize failure
-                * Instead of stop driver initialization
-                */
-               if (ret)
-                       dev_err(psp->adev->dev,
-                               "XGMI: Failed to initialize XGMI session\n");
-       }
-
        if (psp->adev->psp.ta_fw) {
                ret = psp_ras_initialize(psp);
                if (ret)
@@ -1494,10 +1484,6 @@ static int psp_hw_fini(void *handle)
        void *tmr_buf;
        void **pptr;
 
-       if (adev->gmc.xgmi.num_physical_nodes > 1 &&
-           psp->xgmi_context.initialized == 1)
-                psp_xgmi_terminate(psp);
-
        if (psp->adev->psp.ta_fw) {
                psp_ras_terminate(psp);
                psp_dtm_terminate(psp);
index 611021514c5250d4f07e2a507c69abebeda8a6c7..c77e1abb538aae686dcb5d191b0d410f527e86fe 100644 (file)
@@ -362,6 +362,8 @@ int psp_gpu_reset(struct amdgpu_device *adev);
 int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
                        uint64_t cmd_gpu_addr, int cmd_size);
 
+int psp_xgmi_initialize(struct psp_context *psp);
+int psp_xgmi_terminate(struct psp_context *psp);
 int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
index a97af422575a319e7488bb7d16273e46ae5bb079..490f57d6704c2adf800448567d24376196423acc 100644 (file)
@@ -365,6 +365,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                return 0;
 
        if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+               ret = psp_xgmi_initialize(&adev->psp);
+               if (ret) {
+                       dev_err(adev->dev,
+                               "XGMI: Failed to initialize xgmi session\n");
+                       return ret;
+               }
+
                ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
                if (ret) {
                        dev_err(adev->dev,
@@ -451,16 +458,16 @@ exit:
        return ret;
 }
 
-void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 {
        struct amdgpu_hive_info *hive;
 
        if (!adev->gmc.xgmi.supported)
-               return;
+               return -EINVAL;
 
        hive = amdgpu_get_xgmi_hive(adev, 1);
        if (!hive)
-               return;
+               return -EINVAL;
 
        if (!(hive->number_devices--)) {
                amdgpu_xgmi_sysfs_destroy(adev, hive);
@@ -471,6 +478,8 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
                amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
                mutex_unlock(&hive->hive_lock);
        }
+
+       return psp_xgmi_terminate(&adev->psp);
 }
 
 int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
index 74011fbc225190cbbb8f3c11b08270b8cbeca691..c62a4acf4c1475075891a12cf995278fc312ab1e 100644 (file)
@@ -40,7 +40,7 @@ struct amdgpu_hive_info {
 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
-void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
+int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
 int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
                struct amdgpu_device *peer_adev);