drm/amdgpu/vg20:Restruct uvd.inst to support multiple instances
authorJames Zhu <James.Zhu@amd.com>
Tue, 15 May 2018 19:31:24 +0000 (14:31 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 18 May 2018 21:08:12 +0000 (16:08 -0500)
Vega20 has dual-UVD. Need add multiple instances support for uvd.
Restruct uvd.inst, using uvd.inst[0] to replace uvd.inst->.
Repurpose amdgpu_ring::me for instance index, and initialize to 0.
There are no any logical changes here.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c

index 1070f40..39ec6b8 100644 (file)
@@ -376,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
        struct amdgpu_device *adev = ring->adev;
        uint64_t index;
 
-       if (ring != &adev->uvd.inst->ring) {
+       if (ring != &adev->uvd.inst[ring->me].ring) {
                ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
                ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
        } else {
                /* put fence directly behind firmware */
                index = ALIGN(adev->uvd.fw->size, 8);
-               ring->fence_drv.cpu_addr = adev->uvd.inst->cpu_addr + index;
-               ring->fence_drv.gpu_addr = adev->uvd.inst->gpu_addr + index;
+               ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+               ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
        }
        amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
        amdgpu_irq_get(adev, irq_src, irq_type);
index 5620ed2..91517b1 100644 (file)
@@ -286,7 +286,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
        struct drm_crtc *crtc;
        uint32_t ui32 = 0;
        uint64_t ui64 = 0;
-       int i, found;
+       int i, j, found;
        int ui32_size = sizeof(ui32);
 
        if (!info->return_size || !info->return_pointer)
@@ -348,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                        break;
                case AMDGPU_HW_IP_UVD:
                        type = AMD_IP_BLOCK_TYPE_UVD;
-                       ring_mask = adev->uvd.inst->ring.ready ? 1 : 0;
+                       for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+                               ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
                        ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
                        ib_size_alignment = 16;
                        break;
@@ -361,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                        break;
                case AMDGPU_HW_IP_UVD_ENC:
                        type = AMD_IP_BLOCK_TYPE_UVD;
-                       for (i = 0; i < adev->uvd.num_enc_rings; i++)
-                               ring_mask |= ((adev->uvd.inst->ring_enc[i].ready ? 1 : 0) << i);
+                       for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+                               for (j = 0; j < adev->uvd.num_enc_rings; j++)
+                                       ring_mask |=
+                                       ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
+                                       (j + i * adev->uvd.num_enc_rings));
                        ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
                        ib_size_alignment = 1;
                        break;
index 49cad08..c6850b6 100644 (file)
@@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 
        dma_fence_put(ring->vmid_wait);
        ring->vmid_wait = NULL;
+       ring->me = 0;
 
        ring->adev->rings[ring->idx] = NULL;
 }
index 02683a0..e961492 100644 (file)
@@ -127,7 +127,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
        const char *fw_name;
        const struct common_firmware_header *hdr;
        unsigned version_major, version_minor, family_id;
-       int i, r;
+       int i, j, r;
 
        INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
 
@@ -236,28 +236,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
                bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 
-       r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
-                                   AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst->vcpu_bo,
-                                   &adev->uvd.inst->gpu_addr, &adev->uvd.inst->cpu_addr);
-       if (r) {
-               dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
-               return r;
-       }
+       for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
 
-       ring = &adev->uvd.inst->ring;
-       rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-       r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity,
-                                 rq, NULL);
-       if (r != 0) {
-               DRM_ERROR("Failed setting up UVD run queue.\n");
-               return r;
-       }
+               r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+                                           AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
+                                           &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+               if (r) {
+                       dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+                       return r;
+               }
 
-       for (i = 0; i < adev->uvd.max_handles; ++i) {
-               atomic_set(&adev->uvd.inst->handles[i], 0);
-               adev->uvd.inst->filp[i] = NULL;
-       }
+               ring = &adev->uvd.inst[j].ring;
+               rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+               r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
+                                         rq, NULL);
+               if (r != 0) {
+                       DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
+                       return r;
+               }
 
+               for (i = 0; i < adev->uvd.max_handles; ++i) {
+                       atomic_set(&adev->uvd.inst[j].handles[i], 0);
+                       adev->uvd.inst[j].filp[i] = NULL;
+               }
+       }
        /* from uvd v5.0 HW addressing capacity increased to 64 bits */
        if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
                adev->uvd.address_64_bit = true;
@@ -284,20 +286,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
-       int i;
-       kfree(adev->uvd.inst->saved_bo);
+       int i, j;
 
-       drm_sched_entity_fini(&adev->uvd.inst->ring.sched, &adev->uvd.inst->entity);
+       for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+               kfree(adev->uvd.inst[j].saved_bo);
 
-       amdgpu_bo_free_kernel(&adev->uvd.inst->vcpu_bo,
-                             &adev->uvd.inst->gpu_addr,
-                             (void **)&adev->uvd.inst->cpu_addr);
+               drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
 
-       amdgpu_ring_fini(&adev->uvd.inst->ring);
+               amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+                                     &adev->uvd.inst[j].gpu_addr,
+                                     (void **)&adev->uvd.inst[j].cpu_addr);
 
-       for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
-               amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
+               amdgpu_ring_fini(&adev->uvd.inst[j].ring);
 
+               for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+                       amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+       }
        release_firmware(adev->uvd.fw);
 
        return 0;
@@ -307,32 +311,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 {
        unsigned size;
        void *ptr;
-       int i;
+       int i, j;
 
-       if (adev->uvd.inst->vcpu_bo == NULL)
-               return 0;
+       for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+               if (adev->uvd.inst[j].vcpu_bo == NULL)
+                       continue;
 
-       cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
+               cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
 
-       /* only valid for physical mode */
-       if (adev->asic_type < CHIP_POLARIS10) {
-               for (i = 0; i < adev->uvd.max_handles; ++i)
-                       if (atomic_read(&adev->uvd.inst->handles[i]))
-                               break;
+               /* only valid for physical mode */
+               if (adev->asic_type < CHIP_POLARIS10) {
+                       for (i = 0; i < adev->uvd.max_handles; ++i)
+                               if (atomic_read(&adev->uvd.inst[j].handles[i]))
+                                       break;
 
-               if (i == adev->uvd.max_handles)
-                       return 0;
-       }
-
-       size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo);
-       ptr = adev->uvd.inst->cpu_addr;
+                       if (i == adev->uvd.max_handles)
+                               continue;
+               }
 
-       adev->uvd.inst->saved_bo = kmalloc(size, GFP_KERNEL);
-       if (!adev->uvd.inst->saved_bo)
-               return -ENOMEM;
+               size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+               ptr = adev->uvd.inst[j].cpu_addr;
 
-       memcpy_fromio(adev->uvd.inst->saved_bo, ptr, size);
+               adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+               if (!adev->uvd.inst[j].saved_bo)
+                       return -ENOMEM;
 
+               memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+       }
        return 0;
 }
 
@@ -340,59 +345,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 {
        unsigned size;
        void *ptr;
+       int i;
 
-       if (adev->uvd.inst->vcpu_bo == NULL)
-               return -EINVAL;
+       for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+               if (adev->uvd.inst[i].vcpu_bo == NULL)
+                       return -EINVAL;
 
-       size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo);
-       ptr = adev->uvd.inst->cpu_addr;
+               size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+               ptr = adev->uvd.inst[i].cpu_addr;
 
-       if (adev->uvd.inst->saved_bo != NULL) {
-               memcpy_toio(ptr, adev->uvd.inst->saved_bo, size);
-               kfree(adev->uvd.inst->saved_bo);
-               adev->uvd.inst->saved_bo = NULL;
-       } else {
-               const struct common_firmware_header *hdr;
-               unsigned offset;
-
-               hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
-               if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-                       offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
-                       memcpy_toio(adev->uvd.inst->cpu_addr, adev->uvd.fw->data + offset,
-                                   le32_to_cpu(hdr->ucode_size_bytes));
-                       size -= le32_to_cpu(hdr->ucode_size_bytes);
-                       ptr += le32_to_cpu(hdr->ucode_size_bytes);
+               if (adev->uvd.inst[i].saved_bo != NULL) {
+                       memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+                       kfree(adev->uvd.inst[i].saved_bo);
+                       adev->uvd.inst[i].saved_bo = NULL;
+               } else {
+                       const struct common_firmware_header *hdr;
+                       unsigned offset;
+
+                       hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+                       if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+                               offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+                               memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+                                           le32_to_cpu(hdr->ucode_size_bytes));
+                               size -= le32_to_cpu(hdr->ucode_size_bytes);
+                               ptr += le32_to_cpu(hdr->ucode_size_bytes);
+                       }
+                       memset_io(ptr, 0, size);
+                       /* to restore uvd fence seq */
+                       amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
                }
-               memset_io(ptr, 0, size);
-               /* to restore uvd fence seq */
-               amdgpu_fence_driver_force_completion(&adev->uvd.inst->ring);
        }
-
        return 0;
 }
 
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 {
-       struct amdgpu_ring *ring = &adev->uvd.inst->ring;
-       int i, r;
+       struct amdgpu_ring *ring;
+       int i, j, r;
 
-       for (i = 0; i < adev->uvd.max_handles; ++i) {
-               uint32_t handle = atomic_read(&adev->uvd.inst->handles[i]);
-               if (handle != 0 && adev->uvd.inst->filp[i] == filp) {
-                       struct dma_fence *fence;
-
-                       r = amdgpu_uvd_get_destroy_msg(ring, handle,
-                                                      false, &fence);
-                       if (r) {
-                               DRM_ERROR("Error destroying UVD (%d)!\n", r);
-                               continue;
-                       }
+       for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+               ring = &adev->uvd.inst[j].ring;
 
-                       dma_fence_wait(fence, false);
-                       dma_fence_put(fence);
+               for (i = 0; i < adev->uvd.max_handles; ++i) {
+                       uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
+                       if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
+                               struct dma_fence *fence;
+
+                               r = amdgpu_uvd_get_destroy_msg(ring, handle,
+                                                              false, &fence);
+                               if (r) {
+                                       DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
+                                       continue;
+                               }
 
-                       adev->uvd.inst->filp[i] = NULL;
-                       atomic_set(&adev->uvd.inst->handles[i], 0);
+                               dma_fence_wait(fence, false);
+                               dma_fence_put(fence);
+
+                               adev->uvd.inst[j].filp[i] = NULL;
+                               atomic_set(&adev->uvd.inst[j].handles[i], 0);
+                       }
                }
        }
 }
@@ -667,15 +678,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
        void *ptr;
        long r;
        int i;
+       uint32_t ip_instance = ctx->parser->job->ring->me;
 
        if (offset & 0x3F) {
-               DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+               DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
                return -EINVAL;
        }
 
        r = amdgpu_bo_kmap(bo, &ptr);
        if (r) {
-               DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
+               DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
                return r;
        }
 
@@ -685,7 +697,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
        handle = msg[2];
 
        if (handle == 0) {
-               DRM_ERROR("Invalid UVD handle!\n");
+               DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
                return -EINVAL;
        }
 
@@ -696,18 +708,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 
                /* try to alloc a new handle */
                for (i = 0; i < adev->uvd.max_handles; ++i) {
-                       if (atomic_read(&adev->uvd.inst->handles[i]) == handle) {
-                               DRM_ERROR("Handle 0x%x already in use!\n", handle);
+                       if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+                               DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
                                return -EINVAL;
                        }
 
-                       if (!atomic_cmpxchg(&adev->uvd.inst->handles[i], 0, handle)) {
-                               adev->uvd.inst->filp[i] = ctx->parser->filp;
+                       if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
+                               adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
                                return 0;
                        }
                }
 
-               DRM_ERROR("No more free UVD handles!\n");
+               DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
                return -ENOSPC;
 
        case 1:
@@ -719,27 +731,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 
                /* validate the handle */
                for (i = 0; i < adev->uvd.max_handles; ++i) {
-                       if (atomic_read(&adev->uvd.inst->handles[i]) == handle) {
-                               if (adev->uvd.inst->filp[i] != ctx->parser->filp) {
-                                       DRM_ERROR("UVD handle collision detected!\n");
+                       if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+                               if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
+                                       DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
                                        return -EINVAL;
                                }
                                return 0;
                        }
                }
 
-               DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+               DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
                return -ENOENT;
 
        case 2:
                /* it's a destroy msg, free the handle */
                for (i = 0; i < adev->uvd.max_handles; ++i)
-                       atomic_cmpxchg(&adev->uvd.inst->handles[i], handle, 0);
+                       atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
                amdgpu_bo_kunmap(bo);
                return 0;
 
        default:
-               DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+               DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
                return -EINVAL;
        }
        BUG();
@@ -1043,7 +1055,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
                if (r)
                        goto err_free;
 
-               r = amdgpu_job_submit(job, ring, &adev->uvd.inst->entity,
+               r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
                                      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
                if (r)
                        goto err_free;
@@ -1189,27 +1201,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
        struct dma_fence *fence;
        long r;
+       uint32_t ip_instance = ring->me;
 
        r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
        if (r) {
-               DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+               DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
                goto error;
        }
 
        r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
        if (r) {
-               DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+               DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
                goto error;
        }
 
        r = dma_fence_wait_timeout(fence, false, timeout);
        if (r == 0) {
-               DRM_ERROR("amdgpu: IB test timed out.\n");
+               DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
                r = -ETIMEDOUT;
        } else if (r < 0) {
-               DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+               DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
        } else {
-               DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+               DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
                r = 0;
        }
 
index 66d4bea..08f3b6c 100644 (file)
@@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
 
-       return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+       return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
 }
 
 /**
@@ -72,10 +72,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
 
-       if (ring == &adev->uvd.inst->ring_enc[0])
-               return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+       if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+               return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
        else
-               return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+               return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
 }
 
 /**
@@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
 
-       return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+       return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
 }
 
 /**
@@ -106,10 +106,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
        if (ring->use_doorbell)
                return adev->wb.wb[ring->wptr_offs];
 
-       if (ring == &adev->uvd.inst->ring_enc[0])
-               return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+       if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+               return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
        else
-               return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+               return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
 }
 
 /**
@@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
 
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+       WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
 }
 
 /**
@@ -144,11 +144,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
                return;
        }
 
-       if (ring == &adev->uvd.inst->ring_enc[0])
-               WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
+       if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+               WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
                        lower_32_bits(ring->wptr));
        else
-               WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
+               WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
                        lower_32_bits(ring->wptr));
 }
 
@@ -387,19 +387,21 @@ static int uvd_v7_0_sw_init(void *handle)
 {
        struct amdgpu_ring *ring;
        struct drm_sched_rq *rq;
-       int i, r;
+       int i, j, r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       /* UVD TRAP */
-       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst->irq);
-       if (r)
-               return r;
-
-       /* UVD ENC TRAP */
-       for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-               r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst->irq);
+       for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+               /* UVD TRAP */
+               r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst[j].irq);
                if (r)
                        return r;
+
+               /* UVD ENC TRAP */
+               for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+                       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst[j].irq);
+                       if (r)
+                               return r;
+               }
        }
 
        r = amdgpu_uvd_sw_init(adev);
@@ -416,43 +418,48 @@ static int uvd_v7_0_sw_init(void *handle)
                DRM_INFO("PSP loading UVD firmware\n");
        }
 
-       ring = &adev->uvd.inst->ring_enc[0];
-       rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-       r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
-                                 rq, NULL);
-       if (r) {
-               DRM_ERROR("Failed setting up UVD ENC run queue.\n");
-               return r;
+       for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+               ring = &adev->uvd.inst[j].ring_enc[0];
+               rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+               r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
+                                         rq, NULL);
+               if (r) {
+                       DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
+                       return r;
+               }
        }
 
        r = amdgpu_uvd_resume(adev);
        if (r)
                return r;
-       if (!amdgpu_sriov_vf(adev)) {
-               ring = &adev->uvd.inst->ring;
-               sprintf(ring->name, "uvd");
-               r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
-               if (r)
-                       return r;
-       }
 
-       for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-               ring = &adev->uvd.inst->ring_enc[i];
-               sprintf(ring->name, "uvd_enc%d", i);
-               if (amdgpu_sriov_vf(adev)) {
-                       ring->use_doorbell = true;
-
-                       /* currently only use the first enconding ring for
-                        * sriov, so set unused location for other unused rings.
-                        */
-                       if (i == 0)
-                               ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
-                       else
-                               ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+       for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+               if (!amdgpu_sriov_vf(adev)) {
+                       ring = &adev->uvd.inst[j].ring;
+                       sprintf(ring->name, "uvd<%d>", j);
+                       r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+                       if (r)
+                               return r;
+               }
+
+               for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+                       ring = &adev->uvd.inst[j].ring_enc[i];
+                       sprintf(ring->name, "uvd_enc%d<%d>", i, j);
+                       if (amdgpu_sriov_vf(adev)) {
+                               ring->use_doorbell = true;
+
+                               /* currently only use the first enconding ring for
+                                * sriov, so set unused location for other unused rings.
+                                */
+                               if (i == 0)
+                                       ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+                               else
+                                       ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+                       }
+                       r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+                       if (r)
+                               return r;
                }
-               r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
-               if (r)
-                       return r;
        }
 
        r = amdgpu_virt_alloc_mm_table(adev);
@@ -464,7 +471,7 @@ static int uvd_v7_0_sw_init(void *handle)
 
 static int uvd_v7_0_sw_fini(void *handle)
 {
-       int i, r;
+       int i, j, r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
        amdgpu_virt_free_mm_table(adev);
@@ -473,11 +480,12 @@ static int uvd_v7_0_sw_fini(void *handle)
        if (r)
                return r;
 
-       drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
-
-       for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-               amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
+       for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+               drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
 
+               for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+                       amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+       }
        return amdgpu_uvd_sw_fini(adev);
 }
 
@@ -491,9 +499,9 @@ static int uvd_v7_0_sw_fini(void *handle)
 static int uvd_v7_0_hw_init(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+       struct amdgpu_ring *ring;
        uint32_t tmp;
-       int i, r;
+       int i, j, r;
 
        if (amdgpu_sriov_vf(adev))
                r = uvd_v7_0_sriov_start(adev);
@@ -502,57 +510,60 @@ static int uvd_v7_0_hw_init(void *handle)
        if (r)
                goto done;
 
-       if (!amdgpu_sriov_vf(adev)) {
-               ring->ready = true;
-               r = amdgpu_ring_test_ring(ring);
-               if (r) {
-                       ring->ready = false;
-                       goto done;
+       for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+               ring = &adev->uvd.inst[j].ring;
+
+               if (!amdgpu_sriov_vf(adev)) {
+                       ring->ready = true;
+                       r = amdgpu_ring_test_ring(ring);
+                       if (r) {
+                               ring->ready = false;
+                               goto done;
+                       }
+
+                       r = amdgpu_ring_alloc(ring, 10);
+                       if (r) {
+                               DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
+                               goto done;
+                       }
+
+                       tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+                               mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+                       amdgpu_ring_write(ring, tmp);
+                       amdgpu_ring_write(ring, 0xFFFFF);
+
+                       tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+                               mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+                       amdgpu_ring_write(ring, tmp);
+                       amdgpu_ring_write(ring, 0xFFFFF);
+
+                       tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+                               mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+                       amdgpu_ring_write(ring, tmp);
+                       amdgpu_ring_write(ring, 0xFFFFF);
+
+                       /* Clear timeout status bits */
+                       amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+                               mmUVD_SEMA_TIMEOUT_STATUS), 0));
+                       amdgpu_ring_write(ring, 0x8);
+
+                       amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+                               mmUVD_SEMA_CNTL), 0));
+                       amdgpu_ring_write(ring, 3);
+
+                       amdgpu_ring_commit(ring);
                }
 
-               r = amdgpu_ring_alloc(ring, 10);
-               if (r) {
-                       DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
-                       goto done;
+               for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+                       ring = &adev->uvd.inst[j].ring_enc[i];
+                       ring->ready = true;
+                       r = amdgpu_ring_test_ring(ring);
+                       if (r) {
+                               ring->ready = false;
+                               goto done;
+                       }
                }
-
-               tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-                       mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
-               amdgpu_ring_write(ring, tmp);
-               amdgpu_ring_write(ring, 0xFFFFF);
-
-               tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-                       mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
-               amdgpu_ring_write(ring, tmp);
-               amdgpu_ring_write(ring, 0xFFFFF);
-
-               tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-                       mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
-               amdgpu_ring_write(ring, tmp);
-               amdgpu_ring_write(ring, 0xFFFFF);
-
-               /* Clear timeout status bits */
-               amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-                       mmUVD_SEMA_TIMEOUT_STATUS), 0));
-               amdgpu_ring_write(ring, 0x8);
-
-               amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-                       mmUVD_SEMA_CNTL), 0));
-               amdgpu_ring_write(ring, 3);
-
-               amdgpu_ring_commit(ring);
        }
-
-       for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-               ring = &adev->uvd.inst->ring_enc[i];
-               ring->ready = true;
-               r = amdgpu_ring_test_ring(ring);
-               if (r) {
-                       ring->ready = false;
-                       goto done;
-               }
-       }
-
 done:
        if (!r)
                DRM_INFO("UVD and UVD ENC initialized successfully.\n");
@@ -570,7 +581,7 @@ done:
 static int uvd_v7_0_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+       int i;
 
        if (!amdgpu_sriov_vf(adev))
                uvd_v7_0_stop(adev);
@@ -579,7 +590,8 @@ static int uvd_v7_0_hw_fini(void *handle)
                DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
        }
 
-       ring->ready = false;
+       for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
+               adev->uvd.inst[i].ring.ready = false;
 
        return 0;
 }
@@ -619,48 +631,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
 {
        uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
        uint32_t offset;
+       int i;
 
-       if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-               WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-                       lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-               WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-                       upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-               offset = 0;
-       } else {
-               WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-                       lower_32_bits(adev->uvd.inst->gpu_addr));
-               WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-                       upper_32_bits(adev->uvd.inst->gpu_addr));
-               offset = size;
-       }
+       for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+               if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+                       WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+                               lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+                       WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+                               upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+                       offset = 0;
+               } else {
+                       WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+                               lower_32_bits(adev->uvd.inst[i].gpu_addr));
+                       WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+                               upper_32_bits(adev->uvd.inst[i].gpu_addr));
+                       offset = size;
+               }
 
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
-                               AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
-
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
-                       lower_32_bits(adev->uvd.inst->gpu_addr + offset));
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
-                       upper_32_bits(adev->uvd.inst->gpu_addr + offset));
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
-
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
-                       lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
-                       upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
-                       AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
-       WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
-                       adev->gfx.config.gb_addr_config);
-       WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
-                       adev->gfx.config.gb_addr_config);
-       WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
-                       adev->gfx.config.gb_addr_config);
-
-       WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+                                       AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+               WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+                               lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+               WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+                               upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
+
+               WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+                               lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+               WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+                               upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
+                               AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+               WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
+                               adev->gfx.config.gb_addr_config);
+               WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
+                               adev->gfx.config.gb_addr_config);
+               WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
+                               adev->gfx.config.gb_addr_config);
+
+               WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+       }
 }
 
 static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
@@ -670,6 +685,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
        uint64_t addr = table->gpu_addr;
        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
        uint32_t size;
+       int i;
 
        size = header->header_size + header->vce_table_size + header->uvd_table_size;
 
@@ -689,11 +705,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
        /* 4, set resp to zero */
        WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
 
-       WDOORBELL32(adev->uvd.inst->ring_enc[0].doorbell_index, 0);
-       adev->wb.wb[adev->uvd.inst->ring_enc[0].wptr_offs] = 0;
-       adev->uvd.inst->ring_enc[0].wptr = 0;
-       adev->uvd.inst->ring_enc[0].wptr_old = 0;
-
+       for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+               WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
+               adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
+               adev->uvd.inst[i].ring_enc[0].wptr = 0;
+               adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
+       }
        /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
        WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
 
@@ -726,6 +743,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
        struct mmsch_v1_0_cmd_end end = { {0} };
        uint32_t *init_table = adev->virt.mm_table.cpu_addr;
        struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+       uint8_t i = 0;
 
        direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
        direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
@@ -743,120 +761,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 
                init_table += header->uvd_table_offset;
 
-               ring = &adev->uvd.inst->ring;
-               ring->wptr = 0;
-               size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
-
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
-                                                  0xFFFFFFFF, 0x00000004);
-               /* mc resume*/
-               if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
-                                                   lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
-                                                   upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-                       offset = 0;
-               } else {
-                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
-                                                   lower_32_bits(adev->uvd.inst->gpu_addr));
-                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
-                                                   upper_32_bits(adev->uvd.inst->gpu_addr));
-                       offset = size;
+               for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+                       ring = &adev->uvd.inst[i].ring;
+                       ring->wptr = 0;
+                       size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+                                                          0xFFFFFFFF, 0x00000004);
+                       /* mc resume*/
+                       if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+                               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+                                                           lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+                               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+                                                           upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+                               offset = 0;
+                       } else {
+                               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+                                                           lower_32_bits(adev->uvd.inst[i].gpu_addr));
+                               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+                                                           upper_32_bits(adev->uvd.inst[i].gpu_addr));
+                               offset = size;
+                       }
+
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+                                                   AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
+
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+                                                   lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+                                                   upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+                                                   lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+                                                   upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+                                                   AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+                       /* mc resume end*/
+
+                       /* disable clock gating */
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
+                                                          ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+                       /* disable interupt */
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+                                                          ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+                       /* stall UMC and register bus before resetting VCPU */
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+                                                          ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+                                                          UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+                       /* put LMI, VCPU, RBC etc... into reset */
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+                                                   (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+                                                              UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+                       /* initialize UVD memory controller */
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
+                                                   (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+                                                              UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+                                                              UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+                                                              UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+                                                              UVD_LMI_CTRL__REQ_MODE_MASK |
+                                                              0x00100000L));
+
+                       /* take all subblocks out of reset, except VCPU */
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+                                                   UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+                       /* enable VCPU clock */
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+                                                   UVD_VCPU_CNTL__CLK_EN_MASK);
+
+                       /* enable master interrupt */
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+                                                          ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+                                                          (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+                       /* clear the bit 4 of UVD_STATUS */
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+                                                          ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+                       /* force RBC into idle state */
+                       size = order_base_2(ring->ring_size);
+                       tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+                       tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+                       ring = &adev->uvd.inst[i].ring_enc[0];
+                       ring->wptr = 0;
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+                       /* boot up the VCPU */
+                       MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
+
+                       /* enable UMC */
+                       MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+                                                                                          ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+                       MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
                }
-
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
-                                           AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
-
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
-                                           lower_32_bits(adev->uvd.inst->gpu_addr + offset));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
-                                           upper_32_bits(adev->uvd.inst->gpu_addr + offset));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
-
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
-                                           lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
-                                           upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
-                                           AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
-               /* mc resume end*/
-
-               /* disable clock gating */
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
-                                                  ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
-
-               /* disable interupt */
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
-                                                  ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
-
-               /* stall UMC and register bus before resetting VCPU */
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-                                                  ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
-                                                  UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
-               /* put LMI, VCPU, RBC etc... into reset */
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-                                           (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
-                                                      UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
-
-               /* initialize UVD memory controller */
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
-                                           (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
-                                                      UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
-                                                      UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
-                                                      UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
-                                                      UVD_LMI_CTRL__REQ_MODE_MASK |
-                                                      0x00100000L));
-
-               /* take all subblocks out of reset, except VCPU */
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-                                           UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-
-               /* enable VCPU clock */
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
-                                           UVD_VCPU_CNTL__CLK_EN_MASK);
-
-               /* enable master interrupt */
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
-                                                  ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
-                                                  (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
-               /* clear the bit 4 of UVD_STATUS */
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
-                                                  ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
-
-               /* force RBC into idle state */
-               size = order_base_2(ring->ring_size);
-               tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
-               tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
-
-               ring = &adev->uvd.inst->ring_enc[0];
-               ring->wptr = 0;
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
-
-               /* boot up the VCPU */
-               MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
-
-               /* enable UMC */
-               MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-                                                                                  ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
-
-               MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
-
                /* add end packet */
                memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
                table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
@@ -875,15 +894,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
  */
 static int uvd_v7_0_start(struct amdgpu_device *adev)
 {
-       struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+       struct amdgpu_ring *ring;
        uint32_t rb_bufsz, tmp;
        uint32_t lmi_swap_cntl;
        uint32_t mp_swap_cntl;
-       int i, j, r;
+       int i, j, k, r;
 
-       /* disable DPG */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
-                       ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+       for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+               /* disable DPG */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
+                               ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+       }
 
        /* disable byte swapping */
        lmi_swap_cntl = 0;
@@ -891,157 +912,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
 
        uvd_v7_0_mc_resume(adev);
 
-       /* disable clock gating */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0,
-                       ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
-
-       /* disable interupt */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
-                       ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
-       /* stall UMC and register bus before resetting VCPU */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-                       UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
-                       ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-       mdelay(1);
-
-       /* put LMI, VCPU, RBC etc... into reset */
-       WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
-               UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
-               UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
-       mdelay(5);
+       for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+               ring = &adev->uvd.inst[k].ring;
+               /* disable clock gating */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
+                               ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
 
-       /* initialize UVD memory controller */
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
-               (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
-               UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
-               UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
-               UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
-               UVD_LMI_CTRL__REQ_MODE_MASK |
-               0x00100000L);
+               /* disable interupt */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
+                               ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+               /* stall UMC and register bus before resetting VCPU */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
+                               UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+                               ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+               mdelay(1);
+
+               /* put LMI, VCPU, RBC etc... into reset */
+               WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+                       UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+                       UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+               mdelay(5);
+
+               /* initialize UVD memory controller */
+               WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
+                       (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+                       UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+                       UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+                       UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+                       UVD_LMI_CTRL__REQ_MODE_MASK |
+                       0x00100000L);
 
 #ifdef __BIG_ENDIAN
-       /* swap (8 in 32) RB and IB */
-       lmi_swap_cntl = 0xa;
-       mp_swap_cntl = 0;
+               /* swap (8 in 32) RB and IB */
+               lmi_swap_cntl = 0xa;
+               mp_swap_cntl = 0;
 #endif
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
-       WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
-
-       WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
-       WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
-       WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
-       WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
-       WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
-       WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
-
-       /* take all subblocks out of reset, except VCPU */
-       WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
-                       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-       mdelay(5);
+               WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+               WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
 
-       /* enable VCPU clock */
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
-                       UVD_VCPU_CNTL__CLK_EN_MASK);
+               WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
+               WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
+               WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
+               WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
+               WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
+               WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
 
-       /* enable UMC */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
-                       ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+               /* take all subblocks out of reset, except VCPU */
+               WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+                               UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+               mdelay(5);
 
-       /* boot up the VCPU */
-       WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
-       mdelay(10);
+               /* enable VCPU clock */
+               WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
+                               UVD_VCPU_CNTL__CLK_EN_MASK);
 
-       for (i = 0; i < 10; ++i) {
-               uint32_t status;
+               /* enable UMC */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
+                               ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
-               for (j = 0; j < 100; ++j) {
-                       status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+               /* boot up the VCPU */
+               WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
+               mdelay(10);
+
+               for (i = 0; i < 10; ++i) {
+                       uint32_t status;
+
+                       for (j = 0; j < 100; ++j) {
+                               status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
+                               if (status & 2)
+                                       break;
+                               mdelay(10);
+                       }
+                       r = 0;
                        if (status & 2)
                                break;
+
+                       DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
+                       WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
+                                       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+                                       ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+                       mdelay(10);
+                       WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
+                                       ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
                        mdelay(10);
+                       r = -1;
                }
-               r = 0;
-               if (status & 2)
-                       break;
 
-               DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
-               WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-                               UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
-                               ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-               mdelay(10);
-               WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
-                               ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-               mdelay(10);
-               r = -1;
-       }
-
-       if (r) {
-               DRM_ERROR("UVD not responding, giving up!!!\n");
-               return r;
-       }
-       /* enable master interrupt */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
-               (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
-               ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
-       /* clear the bit 4 of UVD_STATUS */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
-                       ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
-
-       /* force RBC into idle state */
-       rb_bufsz = order_base_2(ring->ring_size);
-       tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
-       tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
-       tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
-       tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
-       tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
-       tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
-
-       /* set the write pointer delay */
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
-
-       /* set the wb address */
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
-                       (upper_32_bits(ring->gpu_addr) >> 2));
-
-       /* programm the RB_BASE for ring buffer */
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
-                       lower_32_bits(ring->gpu_addr));
-       WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
-                       upper_32_bits(ring->gpu_addr));
-
-       /* Initialize the ring buffer's read and write pointers */
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
-
-       ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-                       lower_32_bits(ring->wptr));
-
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
-                       ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
-
-       ring = &adev->uvd.inst->ring_enc[0];
-       WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
-       WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-       WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
-       WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-       WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+               if (r) {
+                       DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
+                       return r;
+               }
+               /* enable master interrupt */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
+                       (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+                       ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
 
-       ring = &adev->uvd.inst->ring_enc[1];
-       WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
-       WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-       WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
-       WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-       WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+               /* clear the bit 4 of UVD_STATUS */
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
+                               ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
 
+               /* force RBC into idle state */
+               rb_bufsz = order_base_2(ring->ring_size);
+               tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+               tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+               tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+               tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+               tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+               tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+               WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
+
+               /* set the write pointer delay */
+               WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+               /* set the wb address */
+               WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
+                               (upper_32_bits(ring->gpu_addr) >> 2));
+
+               /* programm the RB_BASE for ring buffer */
+               WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+                               lower_32_bits(ring->gpu_addr));
+               WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+                               upper_32_bits(ring->gpu_addr));
+
+               /* Initialize the ring buffer's read and write pointers */
+               WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
+
+               ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
+               WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
+                               lower_32_bits(ring->wptr));
+
+               WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
+                               ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+               ring = &adev->uvd.inst[k].ring_enc[0];
+               WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+               WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+               WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
+               WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+               WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+               ring = &adev->uvd.inst[k].ring_enc[1];
+               WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+               WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+               WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+               WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+               WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
+       }
        return 0;
 }
 
@@ -1054,26 +1077,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
  */
 static void uvd_v7_0_stop(struct amdgpu_device *adev)
 {
-       /* force RBC into idle state */
-       WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
-
-       /* Stall UMC and register bus before resetting VCPU */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-                       UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
-                       ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-       mdelay(1);
-
-       /* put VCPU into reset */
-       WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
-                       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-       mdelay(5);
+       uint8_t i = 0;
+
+       for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+               /* force RBC into idle state */
+               WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
 
-       /* disable VCPU clock */
-       WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
+               /* Stall UMC and register bus before resetting VCPU */
+               WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+                               UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+                               ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+               mdelay(1);
 
-       /* Unstall UMC and register bus */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
-                       ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+               /* put VCPU into reset */
+               WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
+                               UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+               mdelay(5);
+
+               /* disable VCPU clock */
+               WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
+
+               /* Unstall UMC and register bus */
+               WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+                               ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+       }
 }
 
 /**
@@ -1092,26 +1119,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
        WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
        amdgpu_ring_write(ring, seq);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
        amdgpu_ring_write(ring, addr & 0xffffffff);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
        amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
        amdgpu_ring_write(ring, 0);
 
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
        amdgpu_ring_write(ring, 2);
 }
 
@@ -1160,7 +1187,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
        unsigned i;
        int r;
 
-       WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+       WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
        r = amdgpu_ring_alloc(ring, 3);
        if (r) {
                DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
@@ -1168,11 +1195,11 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
                return r;
        }
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
        amdgpu_ring_write(ring, 0xDEADBEEF);
        amdgpu_ring_commit(ring);
        for (i = 0; i < adev->usec_timeout; i++) {
-               tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID);
+               tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
                if (tmp == 0xDEADBEEF)
                        break;
                DRM_UDELAY(1);
@@ -1204,17 +1231,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
        struct amdgpu_device *adev = ring->adev;
 
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
        amdgpu_ring_write(ring, vmid);
 
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
        amdgpu_ring_write(ring, ib->length_dw);
 }
 
@@ -1242,13 +1269,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
        struct amdgpu_device *adev = ring->adev;
 
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
        amdgpu_ring_write(ring, reg << 2);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
        amdgpu_ring_write(ring, val);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
        amdgpu_ring_write(ring, 8);
 }
 
@@ -1258,16 +1285,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
        struct amdgpu_device *adev = ring->adev;
 
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
        amdgpu_ring_write(ring, reg << 2);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
        amdgpu_ring_write(ring, val);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
        amdgpu_ring_write(ring, mask);
        amdgpu_ring_write(ring,
-               PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+               PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
        amdgpu_ring_write(ring, 12);
 }
 
@@ -1292,7 +1319,7 @@ static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
        struct amdgpu_device *adev = ring->adev;
 
        for (i = 0; i < count; i++)
-               amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+               amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
 
 }
 
@@ -1360,16 +1387,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle)
 
        if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
            REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
-           (RREG32_SOC15(UVD, 0, mmUVD_STATUS) &
+           (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
                    AMDGPU_UVD_STATUS_BUSY_MASK))
                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
                                SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
 
        if (srbm_soft_reset) {
-               adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
+               adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
                return true;
        } else {
-               adev->uvd.inst->srbm_soft_reset = 0;
+               adev->uvd.inst[ring->me].srbm_soft_reset = 0;
                return false;
        }
 }
@@ -1378,7 +1405,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->uvd.inst->srbm_soft_reset)
+       if (!adev->uvd.inst[ring->me].srbm_soft_reset)
                return 0;
 
        uvd_v7_0_stop(adev);
@@ -1390,9 +1417,9 @@ static int uvd_v7_0_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset;
 
-       if (!adev->uvd.inst->srbm_soft_reset)
+       if (!adev->uvd.inst[ring->me].srbm_soft_reset)
                return 0;
-       srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
+       srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
 
        if (srbm_soft_reset) {
                u32 tmp;
@@ -1420,7 +1447,7 @@ static int uvd_v7_0_post_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->uvd.inst->srbm_soft_reset)
+       if (!adev->uvd.inst[ring->me].srbm_soft_reset)
                return 0;
 
        mdelay(5);
@@ -1442,17 +1469,29 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
                                      struct amdgpu_irq_src *source,
                                      struct amdgpu_iv_entry *entry)
 {
+       uint32_t ip_instance;
+
+       switch (entry->client_id) {
+       case SOC15_IH_CLIENTID_UVD:
+               ip_instance = 0;
+               break;
+       default:
+               DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+               return 0;
+       }
+
        DRM_DEBUG("IH: UVD TRAP\n");
+
        switch (entry->src_id) {
        case 124:
-               amdgpu_fence_process(&adev->uvd.inst->ring);
+               amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
                break;
        case 119:
-               amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
+               amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
                break;
        case 120:
                if (!amdgpu_sriov_vf(adev))
-                       amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
+                       amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
                break;
        default:
                DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -1468,9 +1507,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
 {
        uint32_t data, data1, data2, suvd_flags;
 
-       data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL);
-       data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
-       data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL);
+       data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
+       data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+       data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
 
        data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
                  UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
@@ -1514,18 +1553,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
                        UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
        data1 |= suvd_flags;
 
-       WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data);
-       WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0);
-       WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
-       WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2);
+       WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
+       WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
+       WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+       WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
 }
 
 static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
 {
        uint32_t data, data1, cgc_flags, suvd_flags;
 
-       data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE);
-       data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
+       data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
+       data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
 
        cgc_flags = UVD_CGC_GATE__SYS_MASK |
                UVD_CGC_GATE__UDEC_MASK |
@@ -1557,8 +1596,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
        data |= cgc_flags;
        data1 |= suvd_flags;
 
-       WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data);
-       WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
+       WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
+       WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
 }
 
 static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
@@ -1617,7 +1656,7 @@ static int uvd_v7_0_set_powergating_state(void *handle,
        if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
                return 0;
 
-       WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+       WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
 
        if (state == AMD_PG_STATE_GATE) {
                uvd_v7_0_stop(adev);
@@ -1720,18 +1759,27 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 
 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
 {
-       adev->uvd.inst->ring.funcs = &uvd_v7_0_ring_vm_funcs;
-       DRM_INFO("UVD is enabled in VM mode\n");
+       int i;
+
+       for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+               adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
+               adev->uvd.inst[i].ring.me = i;
+               DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
+       }
 }
 
 static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
 {
-       int i;
+       int i, j;
 
-       for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-               adev->uvd.inst->ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+       for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+               for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+                       adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+                       adev->uvd.inst[j].ring_enc[i].me = j;
+               }
 
-       DRM_INFO("UVD ENC is enabled in VM mode\n");
+               DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
+       }
 }
 
 static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
@@ -1741,8 +1789,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
 
 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 {
-       adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
-       adev->uvd.inst->irq.funcs = &uvd_v7_0_irq_funcs;
+       int i;
+
+       for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+               adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
+               adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
+       }
 }
 
 const struct amdgpu_ip_block_version uvd_v7_0_ip_block =