drm/radeon: UVD bringup v8
authorChristian König <deathsimple@vodafone.de>
Mon, 8 Apr 2013 10:41:29 +0000 (12:41 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 9 Apr 2013 14:31:33 +0000 (10:31 -0400)
Just everything needed to decode videos using UVD.

v6: just all the bugfixes and support for R7xx-SI merged in one patch
v7: UVD_CGC_GATE is a write only register, lockup detection fix
v8: split out VRAM fallback changes, remove support for RV770,
    add support for HEMLOCK, add buffer sizes checks

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
23 files changed:
drivers/gpu/drm/radeon/Makefile
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/nid.h
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600d.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_fence.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_object.h
drivers/gpu/drm/radeon/radeon_ring.c
drivers/gpu/drm/radeon/radeon_test.c
drivers/gpu/drm/radeon/radeon_uvd.c [new file with mode: 0644]
drivers/gpu/drm/radeon/rv770.c
drivers/gpu/drm/radeon/rv770d.h
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/sid.h
include/uapi/drm/radeon_drm.h

index bf17252..86c5e36 100644 (file)
@@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
        evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
        evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
        atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
-       si_blit_shaders.o radeon_prime.o
+       si_blit_shaders.o radeon_prime.o radeon_uvd.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
index 305a657..18b66ff 100644 (file)
@@ -3360,6 +3360,9 @@ restart_ih:
                                DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
                                break;
                        }
+               case 124: /* UVD */
+                       DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+                       radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
                        break;
                case 146:
                case 147:
@@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 
 static int evergreen_startup(struct radeon_device *rdev)
 {
-       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       struct radeon_ring *ring;
        int r;
 
        /* enable pcie gen2 link */
@@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device *rdev)
                return r;
        }
 
+       r = rv770_uvd_resume(rdev);
+       if (!r) {
+               r = radeon_fence_driver_start_ring(rdev,
+                                                  R600_RING_TYPE_UVD_INDEX);
+               if (r)
+                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+       }
+
+       if (r)
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
        /* Enable IRQ */
        r = r600_irq_init(rdev);
        if (r) {
@@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *rdev)
        }
        evergreen_irq_set(rdev);
 
+       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
                             R600_CP_RB_RPTR, R600_CP_RB_WPTR,
                             0, 0xfffff, RADEON_CP_PACKET2);
@@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device *rdev)
        if (r)
                return r;
 
+       ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       if (ring->ring_size) {
+               r = radeon_ring_init(rdev, ring, ring->ring_size,
+                                    R600_WB_UVD_RPTR_OFFSET,
+                                    UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+                                    0, 0xfffff, RADEON_CP_PACKET2);
+               if (!r)
+                       r = r600_uvd_init(rdev);
+
+               if (r)
+                       DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
+       }
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev)
 int evergreen_suspend(struct radeon_device *rdev)
 {
        r600_audio_fini(rdev);
+       radeon_uvd_suspend(rdev);
        r700_cp_stop(rdev);
        r600_dma_stop(rdev);
+       r600_uvd_rbc_stop(rdev);
        evergreen_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        evergreen_pcie_gart_disable(rdev);
@@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev)
        rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
        r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
 
+       r = radeon_uvd_init(rdev);
+       if (!r) {
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+               r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
+                              4096);
+       }
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev)
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
        evergreen_pcie_gart_fini(rdev);
+       radeon_uvd_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
        radeon_fence_driver_fini(rdev);
index 982d25a..c5d873e 100644 (file)
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+
+/*
+ * UVD
+ */
+#define UVD_RBC_RB_RPTR                                        0xf690
+#define UVD_RBC_RB_WPTR                                        0xf694
+
 /*
  * PM4
  */
index 02e9580..35d7caa 100644 (file)
@@ -933,6 +933,23 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
        radeon_ring_write(ring, 10); /* poll interval */
 }
 
+void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
+                              struct radeon_ring *ring,
+                              struct radeon_semaphore *semaphore,
+                              bool emit_wait)
+{
+       uint64_t addr = semaphore->gpu_addr;
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+       radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
+}
+
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
 {
        if (enable)
@@ -1684,6 +1701,16 @@ static int cayman_startup(struct radeon_device *rdev)
                return r;
        }
 
+       r = rv770_uvd_resume(rdev);
+       if (!r) {
+               r = radeon_fence_driver_start_ring(rdev,
+                                                  R600_RING_TYPE_UVD_INDEX);
+               if (r)
+                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+       }
+       if (r)
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
        r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
        if (r) {
                dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -1750,6 +1777,18 @@ static int cayman_startup(struct radeon_device *rdev)
        if (r)
                return r;
 
+       ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       if (ring->ring_size) {
+               r = radeon_ring_init(rdev, ring, ring->ring_size,
+                                    R600_WB_UVD_RPTR_OFFSET,
+                                    UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+                                    0, 0xfffff, RADEON_CP_PACKET2);
+               if (!r)
+                       r = r600_uvd_init(rdev);
+               if (r)
+                       DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+       }
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1796,6 +1835,8 @@ int cayman_suspend(struct radeon_device *rdev)
        radeon_vm_manager_fini(rdev);
        cayman_cp_enable(rdev, false);
        cayman_dma_stop(rdev);
+       r600_uvd_rbc_stop(rdev);
+       radeon_uvd_suspend(rdev);
        evergreen_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        cayman_pcie_gart_disable(rdev);
@@ -1870,6 +1911,13 @@ int cayman_init(struct radeon_device *rdev)
        ring->ring_obj = NULL;
        r600_ring_init(rdev, ring, 64 * 1024);
 
+       r = radeon_uvd_init(rdev);
+       if (!r) {
+               ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+               ring->ring_obj = NULL;
+               r600_ring_init(rdev, ring, 4096);
+       }
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1921,6 +1969,7 @@ void cayman_fini(struct radeon_device *rdev)
        radeon_vm_manager_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
+       radeon_uvd_fini(rdev);
        cayman_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
index 445b235..f2555bc 100644 (file)
 #       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
 
 /*
+ * UVD
+ */
+#define UVD_SEMA_ADDR_LOW                              0xEF00
+#define UVD_SEMA_ADDR_HIGH                             0xEF04
+#define UVD_SEMA_CMD                                   0xEF08
+#define UVD_RBC_RB_RPTR                                        0xF690
+#define UVD_RBC_RB_WPTR                                        0xF694
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)        ((RADEON_PACKET_TYPE0 << 30) |                  \
index 1c53087..7ce7b83 100644 (file)
@@ -2552,6 +2552,185 @@ void r600_dma_fini(struct radeon_device *rdev)
 }
 
 /*
+ * UVD
+ */
+int r600_uvd_rbc_start(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       uint64_t rptr_addr;
+       uint32_t rb_bufsz, tmp;
+       int r;
+
+       rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
+
+       if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
+               DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
+               return -EINVAL;
+       }
+
+       /* force RBC into idle state */
+       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+
+       /* Set the write pointer delay */
+       WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
+
+       /* set the wb address */
+       WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
+
+       /* programm the 4GB memory segment for rptr and ring buffer */
+       WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
+                                  (0x7 << 16) | (0x1 << 31));
+
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(UVD_RBC_RB_RPTR, 0x0);
+
+       ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
+       WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+
+       /* set the ring address */
+       WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
+
+       /* Set ring buffer size */
+       rb_bufsz = drm_order(ring->ring_size);
+       rb_bufsz = (0x1 << 8) | rb_bufsz;
+       WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
+
+       ring->ready = true;
+       r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
+       if (r) {
+               ring->ready = false;
+               return r;
+       }
+
+       r = radeon_ring_lock(rdev, ring, 10);
+       if (r) {
+               DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
+               return r;
+       }
+
+       tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+
+       tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+
+       tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+       radeon_ring_write(ring, tmp);
+       radeon_ring_write(ring, 0xFFFFF);
+
+       /* Clear timeout status bits */
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
+       radeon_ring_write(ring, 0x8);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
+       radeon_ring_write(ring, 1);
+
+       radeon_ring_unlock_commit(rdev, ring);
+
+       return 0;
+}
+
+void r600_uvd_rbc_stop(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+
+       /* force RBC into idle state */
+       WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+       ring->ready = false;
+}
+
+int r600_uvd_init(struct radeon_device *rdev)
+{
+       int i, j, r;
+
+       /* disable clock gating */
+       WREG32(UVD_CGC_GATE, 0);
+
+       /* disable interupt */
+       WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
+
+       /* put LMI, VCPU, RBC etc... into reset */
+       WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
+              LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
+              CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
+       mdelay(5);
+
+       /* take UVD block out of reset */
+       WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
+       mdelay(5);
+
+       /* initialize UVD memory controller */
+       WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+                            (1 << 21) | (1 << 9) | (1 << 20));
+
+       /* disable byte swapping */
+       WREG32(UVD_LMI_SWAP_CNTL, 0);
+       WREG32(UVD_MP_SWAP_CNTL, 0);
+
+       WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
+       WREG32(UVD_MPC_SET_MUXA1, 0x0);
+       WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
+       WREG32(UVD_MPC_SET_MUXB1, 0x0);
+       WREG32(UVD_MPC_SET_ALU, 0);
+       WREG32(UVD_MPC_SET_MUX, 0x88);
+
+       /* Stall UMC */
+       WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+       WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+
+       /* take all subblocks out of reset, except VCPU */
+       WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+       mdelay(5);
+
+       /* enable VCPU clock */
+       WREG32(UVD_VCPU_CNTL,  1 << 9);
+
+       /* enable UMC */
+       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+
+       /* boot up the VCPU */
+       WREG32(UVD_SOFT_RESET, 0);
+       mdelay(10);
+
+       WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+
+       for (i = 0; i < 10; ++i) {
+               uint32_t status;
+               for (j = 0; j < 100; ++j) {
+                       status = RREG32(UVD_STATUS);
+                       if (status & 2)
+                               break;
+                       mdelay(10);
+               }
+               r = 0;
+               if (status & 2)
+                       break;
+
+               DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+               WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
+               mdelay(10);
+               WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
+               mdelay(10);
+               r = -1;
+       }
+       if (r) {
+               DRM_ERROR("UVD not responding, giving up!!!\n");
+               return r;
+       }
+       /* enable interupt */
+       WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+       r = r600_uvd_rbc_start(rdev);
+       if (r)
+               return r;
+
+       DRM_INFO("UVD initialized successfully.\n");
+       return 0;
+}
+
+/*
  * GPU scratch registers helpers function.
  */
 void r600_scratch_init(struct radeon_device *rdev)
@@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev,
        return r;
 }
 
+int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+
+       WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
+       r = radeon_ring_lock(rdev, ring, 3);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
+                         ring->idx, r);
+               return r;
+       }
+       radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+       radeon_ring_write(ring, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev, ring);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(UVD_CONTEXT_ID);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test on %d succeeded in %d usecs\n",
+                        ring->idx, i);
+       } else {
+               DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+                         ring->idx, tmp);
+               r = -EINVAL;
+       }
+       return r;
+}
+
 /*
  * CP fences/semaphores
  */
@@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
        }
 }
 
+void r600_uvd_fence_emit(struct radeon_device *rdev,
+                        struct radeon_fence *fence)
+{
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+       radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+       radeon_ring_write(ring, fence->seq);
+       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+       radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+       radeon_ring_write(ring, 0);
+
+       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+       radeon_ring_write(ring, 2);
+       return;
+}
+
 void r600_semaphore_ring_emit(struct radeon_device *rdev,
                              struct radeon_ring *ring,
                              struct radeon_semaphore *semaphore,
@@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
        radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
 }
 
+void r600_uvd_semaphore_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait)
+{
+       uint64_t addr = semaphore->gpu_addr;
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+       radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+       radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+       radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+       radeon_ring_write(ring, emit_wait ? 1 : 0);
+}
+
 int r600_copy_blit(struct radeon_device *rdev,
                   uint64_t src_offset,
                   uint64_t dst_offset,
@@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
        radeon_ring_write(ring, ib->length_dw);
 }
 
+void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
+       radeon_ring_write(ring, ib->gpu_addr);
+       radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
+       radeon_ring_write(ring, ib->length_dw);
+}
+
 int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
        struct radeon_ib ib;
@@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
        return r;
 }
 
+int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+       struct radeon_fence *fence;
+       int r;
+
+       r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+       if (r) {
+               DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
+       if (r) {
+               DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+               return r;
+       }
+
+       r = radeon_fence_wait(fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               return r;
+       }
+       DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+       radeon_fence_unref(&fence);
+       return r;
+}
+
 /**
  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
  *
index a42ba11..441bdb8 100644 (file)
 #define SRBM_SOFT_RESET                                   0xe60
 #       define SOFT_RESET_DMA                             (1 << 12)
 #       define SOFT_RESET_RLC                             (1 << 13)
+#       define SOFT_RESET_UVD                             (1 << 18)
 #       define RV770_SOFT_RESET_DMA                       (1 << 20)
 
 #define CP_INT_CNTL                                       0xc124
 #       define AFMT_AZ_AUDIO_ENABLE_CHG_ACK  (1 << 30)
 
 /*
+ * UVD
+ */
+#define UVD_SEMA_ADDR_LOW                              0xef00
+#define UVD_SEMA_ADDR_HIGH                             0xef04
+#define UVD_SEMA_CMD                                   0xef08
+
+#define UVD_GPCOM_VCPU_CMD                             0xef0c
+#define UVD_GPCOM_VCPU_DATA0                           0xef10
+#define UVD_GPCOM_VCPU_DATA1                           0xef14
+#define UVD_ENGINE_CNTL                                        0xef18
+
+#define UVD_SEMA_CNTL                                  0xf400
+#define UVD_RB_ARB_CTRL                                        0xf480
+
+#define UVD_LMI_EXT40_ADDR                             0xf498
+#define UVD_CGC_GATE                                   0xf4a8
+#define UVD_LMI_CTRL2                                  0xf4f4
+#define UVD_MASTINT_EN                                 0xf500
+#define UVD_LMI_ADDR_EXT                               0xf594
+#define UVD_LMI_CTRL                                   0xf598
+#define UVD_LMI_SWAP_CNTL                              0xf5b4
+#define UVD_MP_SWAP_CNTL                               0xf5bC
+#define UVD_MPC_CNTL                                   0xf5dC
+#define UVD_MPC_SET_MUXA0                              0xf5e4
+#define UVD_MPC_SET_MUXA1                              0xf5e8
+#define UVD_MPC_SET_MUXB0                              0xf5eC
+#define UVD_MPC_SET_MUXB1                              0xf5f0
+#define UVD_MPC_SET_MUX                                        0xf5f4
+#define UVD_MPC_SET_ALU                                        0xf5f8
+
+#define UVD_VCPU_CNTL                                  0xf660
+#define UVD_SOFT_RESET                                 0xf680
+#define                RBC_SOFT_RESET                                  (1<<0)
+#define                LBSI_SOFT_RESET                                 (1<<1)
+#define                LMI_SOFT_RESET                                  (1<<2)
+#define                VCPU_SOFT_RESET                                 (1<<3)
+#define                CSM_SOFT_RESET                                  (1<<5)
+#define                CXW_SOFT_RESET                                  (1<<6)
+#define                TAP_SOFT_RESET                                  (1<<7)
+#define                LMI_UMC_SOFT_RESET                              (1<<13)
+#define UVD_RBC_IB_BASE                                        0xf684
+#define UVD_RBC_IB_SIZE                                        0xf688
+#define UVD_RBC_RB_BASE                                        0xf68c
+#define UVD_RBC_RB_RPTR                                        0xf690
+#define UVD_RBC_RB_WPTR                                        0xf694
+#define UVD_RBC_RB_WPTR_CNTL                           0xf698
+
+#define UVD_STATUS                                     0xf6bc
+
+#define UVD_SEMA_TIMEOUT_STATUS                                0xf6c0
+#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL          0xf6c4
+#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL               0xf6c8
+#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL                0xf6cc
+
+#define UVD_RBC_RB_CNTL                                        0xf6a4
+#define UVD_RBC_RB_RPTR_ADDR                           0xf6a8
+
+#define UVD_CONTEXT_ID                                 0xf6f4
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)        ((RADEON_PACKET_TYPE0 << 30) |                  \
index 3db6b02..66e68c1 100644 (file)
@@ -110,24 +110,27 @@ extern int radeon_fastfb;
 #define RADEON_BIOS_NUM_SCRATCH                        8
 
 /* max number of rings */
-#define RADEON_NUM_RINGS                       5
+#define RADEON_NUM_RINGS                       6
 
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ              0LL
 
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
-#define RADEON_RING_TYPE_GFX_INDEX             0
+#define RADEON_RING_TYPE_GFX_INDEX     0
 
 /* cayman has 2 compute CP rings */
-#define CAYMAN_RING_TYPE_CP1_INDEX             1
-#define CAYMAN_RING_TYPE_CP2_INDEX             2
+#define CAYMAN_RING_TYPE_CP1_INDEX     1
+#define CAYMAN_RING_TYPE_CP2_INDEX     2
 
 /* R600+ has an async dma ring */
 #define R600_RING_TYPE_DMA_INDEX               3
 /* cayman add a second async dma ring */
 #define CAYMAN_RING_TYPE_DMA1_INDEX            4
 
+/* R600+ */
+#define R600_RING_TYPE_UVD_INDEX       5
+
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET                    (1 << 20)
 #define RADEON_VA_RESERVED_SIZE                        (8 << 20)
@@ -921,6 +924,7 @@ struct radeon_wb {
 #define R600_WB_DMA_RPTR_OFFSET   1792
 #define R600_WB_IH_WPTR_OFFSET   2048
 #define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
+#define R600_WB_UVD_RPTR_OFFSET  2560
 #define R600_WB_EVENT_OFFSET     3072
 
 /**
@@ -1121,6 +1125,33 @@ struct radeon_pm {
 int radeon_pm_get_type_index(struct radeon_device *rdev,
                             enum radeon_pm_state_type ps_type,
                             int instance);
+/*
+ * UVD
+ */
+#define RADEON_MAX_UVD_HANDLES 10
+#define RADEON_UVD_STACK_SIZE  (1024*1024)
+#define RADEON_UVD_HEAP_SIZE   (1024*1024)
+
+struct radeon_uvd {
+       struct radeon_bo        *vcpu_bo;
+       void                    *cpu_addr;
+       uint64_t                gpu_addr;
+       atomic_t                handles[RADEON_MAX_UVD_HANDLES];
+       struct drm_file         *filp[RADEON_MAX_UVD_HANDLES];
+};
+
+int radeon_uvd_init(struct radeon_device *rdev);
+void radeon_uvd_fini(struct radeon_device *rdev);
+int radeon_uvd_suspend(struct radeon_device *rdev);
+int radeon_uvd_resume(struct radeon_device *rdev);
+int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
+                             uint32_t handle, struct radeon_fence **fence);
+int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
+                              uint32_t handle, struct radeon_fence **fence);
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
+void radeon_uvd_free_handles(struct radeon_device *rdev,
+                            struct drm_file *filp);
+int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
 
 struct r600_audio {
        int                     channels;
@@ -1611,6 +1642,7 @@ struct radeon_device {
        struct radeon_asic              *asic;
        struct radeon_gem               gem;
        struct radeon_pm                pm;
+       struct radeon_uvd               uvd;
        uint32_t                        bios_scratch[RADEON_BIOS_NUM_SCRATCH];
        struct radeon_wb                wb;
        struct radeon_dummy_page        dummy_page;
@@ -1625,6 +1657,7 @@ struct radeon_device {
        const struct firmware *rlc_fw;  /* r6/700 RLC firmware */
        const struct firmware *mc_fw;   /* NI MC firmware */
        const struct firmware *ce_fw;   /* SI CE firmware */
+       const struct firmware *uvd_fw;  /* UVD firmware */
        struct r600_blit r600_blit;
        struct r600_vram_scratch vram_scratch;
        int msi_enabled; /* msi enabled */
index aba0a89..a7a7b2b 100644 (file)
@@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic = {
                        .ring_test = &r600_dma_ring_test,
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &r600_dma_is_lockup,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &r600_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
@@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic = {
                        .ring_test = &r600_dma_ring_test,
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &evergreen_dma_is_lockup,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &r600_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
@@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic = {
                        .ring_test = &r600_dma_ring_test,
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &evergreen_dma_is_lockup,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &r600_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
@@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic = {
                        .ring_test = &r600_dma_ring_test,
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &evergreen_dma_is_lockup,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &r600_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
@@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic = {
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &cayman_dma_is_lockup,
                        .vm_flush = &cayman_dma_vm_flush,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &cayman_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
@@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic = {
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &cayman_dma_is_lockup,
                        .vm_flush = &cayman_dma_vm_flush,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &cayman_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
@@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic = {
                        .ib_test = &r600_dma_ib_test,
                        .is_lockup = &si_dma_is_lockup,
                        .vm_flush = &si_dma_vm_flush,
+               },
+               [R600_RING_TYPE_UVD_INDEX] = {
+                       .ib_execute = &r600_uvd_ib_execute,
+                       .emit_fence = &r600_uvd_fence_emit,
+                       .emit_semaphore = &cayman_uvd_semaphore_emit,
+                       .cs_parse = &radeon_uvd_cs_parse,
+                       .ring_test = &r600_uvd_ring_test,
+                       .ib_test = &r600_uvd_ib_test,
+                       .is_lockup = &radeon_ring_test_lockup,
                }
        },
        .irq = {
index 3535f73..515db96 100644 (file)
@@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
 int r600_copy_blit(struct radeon_device *rdev,
                   uint64_t src_offset, uint64_t dst_offset,
                   unsigned num_gpu_pages, struct radeon_fence **fence);
@@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev);
 u32 r600_get_xclk(struct radeon_device *rdev);
 uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev);
 
+/* uvd */
+int r600_uvd_init(struct radeon_device *rdev);
+int r600_uvd_rbc_start(struct radeon_device *rdev);
+void r600_uvd_rbc_stop(struct radeon_device *rdev);
+int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void r600_uvd_fence_emit(struct radeon_device *rdev,
+                        struct radeon_fence *fence);
+void r600_uvd_semaphore_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait);
+void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+
 /*
  * rv770,rv730,rv710,rv740
  */
@@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev,
                  unsigned num_gpu_pages,
                   struct radeon_fence **fence);
 u32 rv770_get_xclk(struct radeon_device *rdev);
+int rv770_uvd_resume(struct radeon_device *rdev);
 
 /*
  * evergreen
@@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev,
  */
 void cayman_fence_ring_emit(struct radeon_device *rdev,
                            struct radeon_fence *fence);
+void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
+                              struct radeon_ring *ring,
+                              struct radeon_semaphore *semaphore,
+                              bool emit_wait);
 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
 int cayman_init(struct radeon_device *rdev);
 void cayman_fini(struct radeon_device *rdev);
index c9ee4c0..c740707 100644 (file)
@@ -53,7 +53,6 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
        }
        for (i = 0; i < p->nrelocs; i++) {
                struct drm_radeon_cs_reloc *r;
-               uint32_t domain;
 
                duplicate = false;
                r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
@@ -81,11 +80,25 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
                p->relocs[i].lobj.bo = p->relocs[i].robj;
                p->relocs[i].lobj.written = !!r->write_domain;
 
-               domain = r->write_domain ? r->write_domain : r->read_domains;
-               p->relocs[i].lobj.domain = domain;
-               if (domain == RADEON_GEM_DOMAIN_VRAM)
-                       domain |= RADEON_GEM_DOMAIN_GTT;
-               p->relocs[i].lobj.alt_domain = domain;
+               /* the first reloc of an UVD job is the
+                  msg and that must be in VRAM */
+               if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) {
+                       /* TODO: is this still needed for NI+ ? */
+                       p->relocs[i].lobj.domain =
+                               RADEON_GEM_DOMAIN_VRAM;
+
+                       p->relocs[i].lobj.alt_domain =
+                               RADEON_GEM_DOMAIN_VRAM;
+
+               } else {
+                       uint32_t domain = r->write_domain ?
+                               r->write_domain : r->read_domains;
+
+                       p->relocs[i].lobj.domain = domain;
+                       if (domain == RADEON_GEM_DOMAIN_VRAM)
+                               domain |= RADEON_GEM_DOMAIN_GTT;
+                       p->relocs[i].lobj.alt_domain = domain;
+               }
 
                p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
                p->relocs[i].handle = r->handle;
@@ -93,7 +106,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
                radeon_bo_list_add_object(&p->relocs[i].lobj,
                                          &p->validated);
        }
-       return radeon_bo_list_validate(&p->validated);
+       return radeon_bo_list_validate(&p->validated, p->ring);
 }
 
 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@@ -128,6 +141,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
                        return -EINVAL;
                }
                break;
+       case RADEON_CS_RING_UVD:
+               p->ring = R600_RING_TYPE_UVD_INDEX;
+               break;
        }
        return 0;
 }
index 3435625..82fe183 100644 (file)
@@ -31,9 +31,9 @@
 #include <linux/seq_file.h>
 #include <linux/atomic.h>
 #include <linux/wait.h>
-#include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/slab.h>
+#include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "radeon_reg.h"
 #include "radeon.h"
@@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 
        radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
        if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
-               rdev->fence_drv[ring].scratch_reg = 0;
-               index = R600_WB_EVENT_OFFSET + ring * 4;
+               if (ring != R600_RING_TYPE_UVD_INDEX) {
+                       rdev->fence_drv[ring].scratch_reg = 0;
+                       index = R600_WB_EVENT_OFFSET + ring * 4;
+                       rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+                       rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
+                                                        index;
+
+               } else {
+                       /* put fence directly behind firmware */
+                       rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr +
+                                                        rdev->uvd_fw->size;
+                       rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr +
+                                                        rdev->uvd_fw->size;
+               }
+
        } else {
                r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
                if (r) {
@@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
                index = RADEON_WB_SCRATCH_OFFSET +
                        rdev->fence_drv[ring].scratch_reg -
                        rdev->scratch.reg_base;
+               rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+               rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
        }
-       rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
-       rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
        radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
        rdev->fence_drv[ring].initialized = true;
        dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
index f546448..8365c75 100644 (file)
@@ -516,6 +516,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
                rdev->hyperz_filp = NULL;
        if (rdev->cmask_filp == file_priv)
                rdev->cmask_filp = NULL;
+       radeon_uvd_free_handles(rdev, file_priv);
 }
 
 /*
index 4466477..1424ccd 100644 (file)
@@ -348,7 +348,7 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
        }
 }
 
-int radeon_bo_list_validate(struct list_head *head)
+int radeon_bo_list_validate(struct list_head *head, int ring)
 {
        struct radeon_bo_list *lobj;
        struct radeon_bo *bo;
@@ -366,6 +366,8 @@ int radeon_bo_list_validate(struct list_head *head)
                        
                retry:
                        radeon_ttm_placement_from_domain(bo, domain);
+                       if (ring == R600_RING_TYPE_UVD_INDEX)
+                               radeon_uvd_force_into_uvd_segment(bo);
                        r = ttm_bo_validate(&bo->tbo, &bo->placement,
                                                true, false);
                        if (unlikely(r)) {
index 5fc86b0..e2cb80a 100644 (file)
@@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
 extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
                                struct list_head *head);
-extern int radeon_bo_list_validate(struct list_head *head);
+extern int radeon_bo_list_validate(struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
                                struct vm_area_struct *vma);
 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
index 8d58e26..31e47d8 100644 (file)
@@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 {
        u32 rptr;
 
-       if (rdev->wb.enabled)
+       if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX])
                rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
        else
                rptr = RREG32(ring->rptr_reg);
@@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
        return 0;
 }
 
-static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
-static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
-static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
-static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX;
-static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
+static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
+static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
+static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
+static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX;
+static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
+static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX;
 
 static struct drm_info_list radeon_debugfs_ring_info_list[] = {
-       {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
-       {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
-       {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
-       {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index},
-       {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index},
+       {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index},
+       {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index},
+       {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index},
+       {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index},
+       {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index},
+       {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
 };
 
 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
index fda09c9..bbed4af 100644 (file)
@@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev)
                radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
 }
 
+static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
+                                            struct radeon_ring *ring,
+                                            struct radeon_fence **fence)
+{
+       int r;
+
+       if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
+               r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+               if (r) {
+                       DRM_ERROR("Failed to get dummy create msg\n");
+                       return r;
+               }
+
+               r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
+               if (r) {
+                       DRM_ERROR("Failed to get dummy destroy msg\n");
+                       return r;
+               }
+       } else {
+               r = radeon_ring_lock(rdev, ring, 64);
+               if (r) {
+                       DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
+                       return r;
+               }
+               radeon_fence_emit(rdev, fence, ring->idx);
+               radeon_ring_unlock_commit(rdev, ring);
+       }
+       return 0;
+}
+
 void radeon_test_ring_sync(struct radeon_device *rdev,
                           struct radeon_ring *ringA,
                           struct radeon_ring *ringB)
@@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-       r = radeon_fence_emit(rdev, &fence1, ringA->idx);
-       if (r) {
-               DRM_ERROR("Failed to emit fence 1\n");
-               radeon_ring_unlock_undo(rdev, ringA);
+       radeon_ring_unlock_commit(rdev, ringA);
+
+       r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
+       if (r)
                goto out_cleanup;
-       }
-       radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-       r = radeon_fence_emit(rdev, &fence2, ringA->idx);
+
+       r = radeon_ring_lock(rdev, ringA, 64);
        if (r) {
-               DRM_ERROR("Failed to emit fence 2\n");
-               radeon_ring_unlock_undo(rdev, ringA);
+               DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
                goto out_cleanup;
        }
+       radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
        radeon_ring_unlock_commit(rdev, ringA);
 
+       r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
+       if (r)
+               goto out_cleanup;
+
        mdelay(1000);
 
        if (radeon_fence_signaled(fence1)) {
@@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-       r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
-       if (r) {
-               DRM_ERROR("Failed to emit sync fence 1\n");
-               radeon_ring_unlock_undo(rdev, ringA);
-               goto out_cleanup;
-       }
        radeon_ring_unlock_commit(rdev, ringA);
 
+       r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
+       if (r)
+               goto out_cleanup;
+
        r = radeon_ring_lock(rdev, ringB, 64);
        if (r) {
                DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
-       r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
-       if (r) {
-               DRM_ERROR("Failed to create sync fence 2\n");
-               radeon_ring_unlock_undo(rdev, ringB);
-               goto out_cleanup;
-       }
        radeon_ring_unlock_commit(rdev, ringB);
+       r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
+       if (r)
+               goto out_cleanup;
 
        mdelay(1000);
 
@@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
                goto out_cleanup;
        }
        if (radeon_fence_signaled(fenceB)) {
-               DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
+               DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
                goto out_cleanup;
        }
 
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
new file mode 100644 (file)
index 0000000..05a192e
--- /dev/null
@@ -0,0 +1,664 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "radeon.h"
+#include "r600d.h"
+
+/* Firmware Names */
+#define FIRMWARE_RV710         "radeon/RV710_uvd.bin"
+#define FIRMWARE_CYPRESS       "radeon/CYPRESS_uvd.bin"
+#define FIRMWARE_SUMO          "radeon/SUMO_uvd.bin"
+#define FIRMWARE_TAHITI                "radeon/TAHITI_uvd.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RV710);
+MODULE_FIRMWARE(FIRMWARE_CYPRESS);
+MODULE_FIRMWARE(FIRMWARE_SUMO);
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
+
+int radeon_uvd_init(struct radeon_device *rdev)
+{
+       struct platform_device *pdev;
+       unsigned long bo_size;
+       const char *fw_name;
+       int i, r;
+
+       pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0);
+       r = IS_ERR(pdev);
+       if (r) {
+               dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n");
+               return -EINVAL;
+       }
+
+       switch (rdev->family) {
+       case CHIP_RV710:
+       case CHIP_RV730:
+       case CHIP_RV740:
+               fw_name = FIRMWARE_RV710;
+               break;
+
+       case CHIP_CYPRESS:
+       case CHIP_HEMLOCK:
+       case CHIP_JUNIPER:
+       case CHIP_REDWOOD:
+       case CHIP_CEDAR:
+               fw_name = FIRMWARE_CYPRESS;
+               break;
+
+       case CHIP_SUMO:
+       case CHIP_SUMO2:
+       case CHIP_PALM:
+       case CHIP_CAYMAN:
+       case CHIP_BARTS:
+       case CHIP_TURKS:
+       case CHIP_CAICOS:
+               fw_name = FIRMWARE_SUMO;
+               break;
+
+       case CHIP_TAHITI:
+       case CHIP_VERDE:
+       case CHIP_PITCAIRN:
+       case CHIP_ARUBA:
+               fw_name = FIRMWARE_TAHITI;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev);
+       if (r) {
+               dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
+                       fw_name);
+               platform_device_unregister(pdev);
+               return r;
+       }
+
+       platform_device_unregister(pdev);
+
+       bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) +
+                 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
+       r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
+                            RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
+       if (r) {
+               dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
+               return r;
+       }
+
+       r = radeon_uvd_resume(rdev);
+       if (r)
+               return r;
+
+       memset(rdev->uvd.cpu_addr, 0, bo_size);
+       memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
+
+       r = radeon_uvd_suspend(rdev);
+       if (r)
+               return r;
+
+       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+               atomic_set(&rdev->uvd.handles[i], 0);
+               rdev->uvd.filp[i] = NULL;
+       }
+
+       return 0;
+}
+
+void radeon_uvd_fini(struct radeon_device *rdev)
+{
+       radeon_uvd_suspend(rdev);
+       radeon_bo_unref(&rdev->uvd.vcpu_bo);
+}
+
+int radeon_uvd_suspend(struct radeon_device *rdev)
+{
+       int r;
+
+       if (rdev->uvd.vcpu_bo == NULL)
+               return 0;
+
+       r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
+       if (!r) {
+               radeon_bo_kunmap(rdev->uvd.vcpu_bo);
+               radeon_bo_unpin(rdev->uvd.vcpu_bo);
+               radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+       }
+       return r;
+}
+
+int radeon_uvd_resume(struct radeon_device *rdev)
+{
+       int r;
+
+       if (rdev->uvd.vcpu_bo == NULL)
+               return -EINVAL;
+
+       r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
+       if (r) {
+               radeon_bo_unref(&rdev->uvd.vcpu_bo);
+               dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
+               return r;
+       }
+
+       r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
+                         &rdev->uvd.gpu_addr);
+       if (r) {
+               radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+               radeon_bo_unref(&rdev->uvd.vcpu_bo);
+               dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
+               return r;
+       }
+
+       r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
+       if (r) {
+               dev_err(rdev->dev, "(%d) UVD map failed\n", r);
+               return r;
+       }
+
+       radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+
+       return 0;
+}
+
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
+{
+       rbo->placement.fpfn = 0 >> PAGE_SHIFT;
+       rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+}
+
+void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
+{
+       int i, r;
+       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+               if (rdev->uvd.filp[i] == filp) {
+                       uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
+                       struct radeon_fence *fence;
+
+                       r = radeon_uvd_get_destroy_msg(rdev,
+                               R600_RING_TYPE_UVD_INDEX, handle, &fence);
+                       if (r) {
+                               DRM_ERROR("Error destroying UVD (%d)!\n", r);
+                               continue;
+                       }
+
+                       radeon_fence_wait(fence, false);
+                       radeon_fence_unref(&fence);
+
+                       rdev->uvd.filp[i] = NULL;
+                       atomic_set(&rdev->uvd.handles[i], 0);
+               }
+       }
+}
+
+static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
+{
+       unsigned stream_type = msg[4];
+       unsigned width = msg[6];
+       unsigned height = msg[7];
+       unsigned dpb_size = msg[9];
+       unsigned pitch = msg[28];
+
+       unsigned width_in_mb = width / 16;
+       unsigned height_in_mb = ALIGN(height / 16, 2);
+
+       unsigned image_size, tmp, min_dpb_size;
+
+       image_size = width * height;
+       image_size += image_size / 2;
+       image_size = ALIGN(image_size, 1024);
+
+       switch (stream_type) {
+       case 0: /* H264 */
+
+               /* reference picture buffer */
+               min_dpb_size = image_size * 17;
+
+               /* macroblock context buffer */
+               min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
+
+               /* IT surface buffer */
+               min_dpb_size += width_in_mb * height_in_mb * 32;
+               break;
+
+       case 1: /* VC1 */
+
+               /* reference picture buffer */
+               min_dpb_size = image_size * 3;
+
+               /* CONTEXT_BUFFER */
+               min_dpb_size += width_in_mb * height_in_mb * 128;
+
+               /* IT surface buffer */
+               min_dpb_size += width_in_mb * 64;
+
+               /* DB surface buffer */
+               min_dpb_size += width_in_mb * 128;
+
+               /* BP */
+               tmp = max(width_in_mb, height_in_mb);
+               min_dpb_size += ALIGN(tmp * 7 * 16, 64);
+               break;
+
+       case 3: /* MPEG2 */
+
+               /* reference picture buffer */
+               min_dpb_size = image_size * 3;
+               break;
+
+       case 4: /* MPEG4 */
+
+               /* reference picture buffer */
+               min_dpb_size = image_size * 3;
+
+               /* CM */
+               min_dpb_size += width_in_mb * height_in_mb * 64;
+
+               /* IT surface buffer */
+               min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
+               break;
+
+       default:
+               DRM_ERROR("UVD codec not handled %d!\n", stream_type);
+               return -EINVAL;
+       }
+
+       if (width > pitch) {
+               DRM_ERROR("Invalid UVD decoding target pitch!\n");
+               return -EINVAL;
+       }
+
+       if (dpb_size < min_dpb_size) {
+               DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
+                         dpb_size, min_dpb_size);
+               return -EINVAL;
+       }
+
+       buf_sizes[0x1] = dpb_size;
+       buf_sizes[0x2] = image_size;
+       return 0;
+}
+
+static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
+                            unsigned offset, unsigned buf_sizes[])
+{
+       int32_t *msg, msg_type, handle;
+       void *ptr;
+
+       int i, r;
+
+       if (offset & 0x3F) {
+               DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+               return -EINVAL;
+       }
+
+       r = radeon_bo_kmap(bo, &ptr);
+       if (r)
+               return r;
+
+       msg = ptr + offset;
+
+       msg_type = msg[1];
+       handle = msg[2];
+
+       if (handle == 0) {
+               DRM_ERROR("Invalid UVD handle!\n");
+               return -EINVAL;
+       }
+
+       if (msg_type == 1) {
+               /* it's a decode msg, calc buffer sizes */
+               r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
+               radeon_bo_kunmap(bo);
+               if (r)
+                       return r;
+
+       } else if (msg_type == 2) {
+               /* it's a destroy msg, free the handle */
+               for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
+                       atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
+               radeon_bo_kunmap(bo);
+               return 0;
+       } else {
+               /* it's a create msg, no special handling needed */
+               radeon_bo_kunmap(bo);
+       }
+
+       /* create or decode, validate the handle */
+       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+               if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
+                       return 0;
+       }
+
+       /* handle not found try to alloc a new one */
+       for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+               if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
+                       p->rdev->uvd.filp[i] = p->filp;
+                       return 0;
+               }
+       }
+
+       DRM_ERROR("No more free UVD handles!\n");
+       return -EINVAL;
+}
+
+static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
+                              int data0, int data1,
+                              unsigned buf_sizes[])
+{
+       struct radeon_cs_chunk *relocs_chunk;
+       struct radeon_cs_reloc *reloc;
+       unsigned idx, cmd, offset;
+       uint64_t start, end;
+       int r;
+
+       relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+       offset = radeon_get_ib_value(p, data0);
+       idx = radeon_get_ib_value(p, data1);
+       if (idx >= relocs_chunk->length_dw) {
+               DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+                         idx, relocs_chunk->length_dw);
+               return -EINVAL;
+       }
+
+       reloc = p->relocs_ptr[(idx / 4)];
+       start = reloc->lobj.gpu_offset;
+       end = start + radeon_bo_size(reloc->robj);
+       start += offset;
+
+       p->ib.ptr[data0] = start & 0xFFFFFFFF;
+       p->ib.ptr[data1] = start >> 32;
+
+       cmd = radeon_get_ib_value(p, p->idx) >> 1;
+
+       if (cmd < 0x4) {
+               if ((end - start) < buf_sizes[cmd]) {
+                       DRM_ERROR("buffer to small (%d / %d)!\n",
+                                 (unsigned)(end - start), buf_sizes[cmd]);
+                       return -EINVAL;
+               }
+
+       } else if (cmd != 0x100) {
+               DRM_ERROR("invalid UVD command %X!\n", cmd);
+               return -EINVAL;
+       }
+
+       if (cmd == 0) {
+               if (end & 0xFFFFFFFFF0000000) {
+                       DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n",
+                                 start, end);
+                       return -EINVAL;
+               }
+
+               r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
+               if (r)
+                       return r;
+       }
+
+       if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) {
+               DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
+                         start, end);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
+                            struct radeon_cs_packet *pkt,
+                            int *data0, int *data1,
+                            unsigned buf_sizes[])
+{
+       int i, r;
+
+       p->idx++;
+       for (i = 0; i <= pkt->count; ++i) {
+               switch (pkt->reg + i*4) {
+               case UVD_GPCOM_VCPU_DATA0:
+                       *data0 = p->idx;
+                       break;
+               case UVD_GPCOM_VCPU_DATA1:
+                       *data1 = p->idx;
+                       break;
+               case UVD_GPCOM_VCPU_CMD:
+                       r = radeon_uvd_cs_reloc(p, *data0, *data1, buf_sizes);
+                       if (r)
+                               return r;
+                       break;
+               case UVD_ENGINE_CNTL:
+                       break;
+               default:
+                       DRM_ERROR("Invalid reg 0x%X!\n",
+                                 pkt->reg + i*4);
+                       return -EINVAL;
+               }
+               p->idx++;
+       }
+       return 0;
+}
+
+int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
+{
+       struct radeon_cs_packet pkt;
+       int r, data0 = 0, data1 = 0;
+
+       /* minimum buffer sizes */
+       unsigned buf_sizes[] = {
+               [0x00000000]    =       2048,
+               [0x00000001]    =       32 * 1024 * 1024,
+               [0x00000002]    =       2048 * 1152 * 3,
+               [0x00000003]    =       2048,
+       };
+
+       if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
+               DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
+                         p->chunks[p->chunk_ib_idx].length_dw);
+               return -EINVAL;
+       }
+
+       if (p->chunk_relocs_idx == -1) {
+               DRM_ERROR("No relocation chunk !\n");
+               return -EINVAL;
+       }
+
+
+       do {
+               r = radeon_cs_packet_parse(p, &pkt, p->idx);
+               if (r)
+                       return r;
+               switch (pkt.type) {
+               case RADEON_PACKET_TYPE0:
+                       r = radeon_uvd_cs_reg(p, &pkt, &data0,
+                                             &data1, buf_sizes);
+                       if (r)
+                               return r;
+                       break;
+               case RADEON_PACKET_TYPE2:
+                       p->idx += pkt.count + 2;
+                       break;
+               default:
+                       DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+                       return -EINVAL;
+               }
+       } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+       return 0;
+}
+
+static int radeon_uvd_send_msg(struct radeon_device *rdev,
+                              int ring, struct radeon_bo *bo,
+                              struct radeon_fence **fence)
+{
+       struct ttm_validate_buffer tv;
+       struct list_head head;
+       struct radeon_ib ib;
+       uint64_t addr;
+       int i, r;
+
+       memset(&tv, 0, sizeof(tv));
+       tv.bo = &bo->tbo;
+
+       INIT_LIST_HEAD(&head);
+       list_add(&tv.head, &head);
+
+       r = ttm_eu_reserve_buffers(&head);
+       if (r)
+               return r;
+
+       radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
+       radeon_uvd_force_into_uvd_segment(bo);
+
+       r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+       if (r) {
+               ttm_eu_backoff_reservation(&head);
+               return r;
+       }
+
+       r = radeon_ib_get(rdev, ring, &ib, NULL, 16);
+       if (r) {
+               ttm_eu_backoff_reservation(&head);
+               return r;
+       }
+
+       addr = radeon_bo_gpu_offset(bo);
+       ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
+       ib.ptr[1] = addr;
+       ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
+       ib.ptr[3] = addr >> 32;
+       ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
+       ib.ptr[5] = 0;
+       for (i = 6; i < 16; ++i)
+               ib.ptr[i] = PACKET2(0);
+       ib.length_dw = 16;
+
+       r = radeon_ib_schedule(rdev, &ib, NULL);
+       if (r) {
+               ttm_eu_backoff_reservation(&head);
+               return r;
+       }
+       ttm_eu_fence_buffer_objects(&head, ib.fence);
+
+       if (fence)
+               *fence = radeon_fence_ref(ib.fence);
+
+       radeon_ib_free(rdev, &ib);
+       radeon_bo_unref(&bo);
+       return 0;
+}
+
+/* multiple fence commands without any stream commands in between can
+   crash the vcpu so just try to emmit a dummy create/destroy msg to
+   avoid this */
+int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
+                             uint32_t handle, struct radeon_fence **fence)
+{
+       struct radeon_bo *bo;
+       uint32_t *msg;
+       int r, i;
+
+       r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
+                            RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
+       if (r)
+               return r;
+
+       r = radeon_bo_reserve(bo, false);
+       if (r) {
+               radeon_bo_unref(&bo);
+               return r;
+       }
+
+       r = radeon_bo_kmap(bo, (void **)&msg);
+       if (r) {
+               radeon_bo_unreserve(bo);
+               radeon_bo_unref(&bo);
+               return r;
+       }
+
+       /* stitch together an UVD create msg */
+       msg[0] = 0x00000de4;
+       msg[1] = 0x00000000;
+       msg[2] = handle;
+       msg[3] = 0x00000000;
+       msg[4] = 0x00000000;
+       msg[5] = 0x00000000;
+       msg[6] = 0x00000000;
+       msg[7] = 0x00000780;
+       msg[8] = 0x00000440;
+       msg[9] = 0x00000000;
+       msg[10] = 0x01b37000;
+       for (i = 11; i < 1024; ++i)
+               msg[i] = 0x0;
+
+       radeon_bo_kunmap(bo);
+       radeon_bo_unreserve(bo);
+
+       return radeon_uvd_send_msg(rdev, ring, bo, fence);
+}
+
+int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
+                              uint32_t handle, struct radeon_fence **fence)
+{
+       struct radeon_bo *bo;
+       uint32_t *msg;
+       int r, i;
+
+       r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
+                            RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
+       if (r)
+               return r;
+
+       r = radeon_bo_reserve(bo, false);
+       if (r) {
+               radeon_bo_unref(&bo);
+               return r;
+       }
+
+       r = radeon_bo_kmap(bo, (void **)&msg);
+       if (r) {
+               radeon_bo_unreserve(bo);
+               radeon_bo_unref(&bo);
+               return r;
+       }
+
+       /* stitch together an UVD destroy msg */
+       msg[0] = 0x00000de4;
+       msg[1] = 0x00000002;
+       msg[2] = handle;
+       msg[3] = 0x00000000;
+       for (i = 4; i < 1024; ++i)
+               msg[i] = 0x0;
+
+       radeon_bo_kunmap(bo);
+       radeon_bo_unreserve(bo);
+
+       return radeon_uvd_send_msg(rdev, ring, bo, fence);
+}
index d4d9be1..a47e7b9 100644 (file)
@@ -68,6 +68,105 @@ u32 rv770_get_xclk(struct radeon_device *rdev)
        return reference_clock;
 }
 
+int rv770_uvd_resume(struct radeon_device *rdev)
+{
+       uint64_t addr;
+       uint32_t chip_id, size;
+       int r;
+
+       r = radeon_uvd_resume(rdev);
+       if (r)
+               return r;
+
+       /* programm the VCPU memory controller bits 0-27 */
+       addr = rdev->uvd.gpu_addr >> 3;
+       size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
+       WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+       WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+       addr += size;
+       size = RADEON_UVD_STACK_SIZE >> 3;
+       WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+       WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+       addr += size;
+       size = RADEON_UVD_HEAP_SIZE >> 3;
+       WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+       WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+       /* bits 28-31 */
+       addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+       WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+       /* bits 32-39 */
+       addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+       WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+       /* tell firmware which hardware it is running on */
+       switch (rdev->family) {
+       default:
+               return -EINVAL;
+       case CHIP_RV710:
+               chip_id = 0x01000005;
+               break;
+       case CHIP_RV730:
+               chip_id = 0x01000006;
+               break;
+       case CHIP_RV740:
+               chip_id = 0x01000007;
+               break;
+       case CHIP_CYPRESS:
+       case CHIP_HEMLOCK:
+               chip_id = 0x01000008;
+               break;
+       case CHIP_JUNIPER:
+               chip_id = 0x01000009;
+               break;
+       case CHIP_REDWOOD:
+               chip_id = 0x0100000a;
+               break;
+       case CHIP_CEDAR:
+               chip_id = 0x0100000b;
+               break;
+       case CHIP_SUMO:
+               chip_id = 0x0100000c;
+               break;
+       case CHIP_SUMO2:
+               chip_id = 0x0100000d;
+               break;
+       case CHIP_PALM:
+               chip_id = 0x0100000e;
+               break;
+       case CHIP_CAYMAN:
+               chip_id = 0x0100000f;
+               break;
+       case CHIP_BARTS:
+               chip_id = 0x01000010;
+               break;
+       case CHIP_TURKS:
+               chip_id = 0x01000011;
+               break;
+       case CHIP_CAICOS:
+               chip_id = 0x01000012;
+               break;
+       case CHIP_TAHITI:
+               chip_id = 0x01000014;
+               break;
+       case CHIP_VERDE:
+               chip_id = 0x01000015;
+               break;
+       case CHIP_PITCAIRN:
+               chip_id = 0x01000016;
+               break;
+       case CHIP_ARUBA:
+               chip_id = 0x01000017;
+               break;
+       }
+       WREG32(UVD_VCPU_CHIP_ID, chip_id);
+
+       return 0;
+}
+
 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
        struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -1040,6 +1139,17 @@ static int rv770_startup(struct radeon_device *rdev)
                return r;
        }
 
+       r = rv770_uvd_resume(rdev);
+       if (!r) {
+               r = radeon_fence_driver_start_ring(rdev,
+                                                  R600_RING_TYPE_UVD_INDEX);
+               if (r)
+                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+       }
+
+       if (r)
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
        /* Enable IRQ */
        r = r600_irq_init(rdev);
        if (r) {
@@ -1074,6 +1184,19 @@ static int rv770_startup(struct radeon_device *rdev)
        if (r)
                return r;
 
+       ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       if (ring->ring_size) {
+               r = radeon_ring_init(rdev, ring, ring->ring_size,
+                                    R600_WB_UVD_RPTR_OFFSET,
+                                    UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+                                    0, 0xfffff, RADEON_CP_PACKET2);
+               if (!r)
+                       r = r600_uvd_init(rdev);
+
+               if (r)
+                       DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+       }
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1115,6 +1238,7 @@ int rv770_resume(struct radeon_device *rdev)
 int rv770_suspend(struct radeon_device *rdev)
 {
        r600_audio_fini(rdev);
+       radeon_uvd_suspend(rdev);
        r700_cp_stop(rdev);
        r600_dma_stop(rdev);
        r600_irq_suspend(rdev);
@@ -1190,6 +1314,13 @@ int rv770_init(struct radeon_device *rdev)
        rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
        r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
 
+       r = radeon_uvd_init(rdev);
+       if (!r) {
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+               r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
+                              4096);
+       }
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1224,6 +1355,7 @@ void rv770_fini(struct radeon_device *rdev)
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
        rv770_pcie_gart_fini(rdev);
+       radeon_uvd_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
        radeon_fence_driver_fini(rdev);
index c55f950..da158b5 100644 (file)
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+/* UVD */
+#define UVD_LMI_EXT40_ADDR                             0xf498
+#define UVD_VCPU_CHIP_ID                               0xf4d4
+#define UVD_VCPU_CACHE_OFFSET0                         0xf4d8
+#define UVD_VCPU_CACHE_SIZE0                           0xf4dc
+#define UVD_VCPU_CACHE_OFFSET1                         0xf4e0
+#define UVD_VCPU_CACHE_SIZE1                           0xf4e4
+#define UVD_VCPU_CACHE_OFFSET2                         0xf4e8
+#define UVD_VCPU_CACHE_SIZE2                           0xf4ec
+#define UVD_LMI_ADDR_EXT                               0xf594
+
+#define UVD_RBC_RB_RPTR                                        0xf690
+#define UVD_RBC_RB_WPTR                                        0xf694
+
 #endif
index ace45da..3e9782d 100644 (file)
@@ -4333,6 +4333,16 @@ static int si_startup(struct radeon_device *rdev)
                return r;
        }
 
+       r = rv770_uvd_resume(rdev);
+       if (!r) {
+               r = radeon_fence_driver_start_ring(rdev,
+                                                  R600_RING_TYPE_UVD_INDEX);
+               if (r)
+                       dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+       }
+       if (r)
+               rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
        /* Enable IRQ */
        r = si_irq_init(rdev);
        if (r) {
@@ -4390,6 +4400,18 @@ static int si_startup(struct radeon_device *rdev)
        if (r)
                return r;
 
+       ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+       if (ring->ring_size) {
+               r = radeon_ring_init(rdev, ring, ring->ring_size,
+                                    R600_WB_UVD_RPTR_OFFSET,
+                                    UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+                                    0, 0xfffff, RADEON_CP_PACKET2);
+               if (!r)
+                       r = r600_uvd_init(rdev);
+               if (r)
+                       DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+       }
+
        r = radeon_ib_pool_init(rdev);
        if (r) {
                dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -4433,6 +4455,8 @@ int si_suspend(struct radeon_device *rdev)
        radeon_vm_manager_fini(rdev);
        si_cp_enable(rdev, false);
        cayman_dma_stop(rdev);
+       r600_uvd_rbc_stop(rdev);
+       radeon_uvd_suspend(rdev);
        si_irq_suspend(rdev);
        radeon_wb_disable(rdev);
        si_pcie_gart_disable(rdev);
@@ -4518,6 +4542,13 @@ int si_init(struct radeon_device *rdev)
        ring->ring_obj = NULL;
        r600_ring_init(rdev, ring, 64 * 1024);
 
+       r = radeon_uvd_init(rdev);
+       if (!r) {
+               ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+               ring->ring_obj = NULL;
+               r600_ring_init(rdev, ring, 4096);
+       }
+
        rdev->ih.ring_obj = NULL;
        r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -4566,6 +4597,7 @@ void si_fini(struct radeon_device *rdev)
        radeon_vm_manager_fini(rdev);
        radeon_ib_pool_fini(rdev);
        radeon_irq_kms_fini(rdev);
+       radeon_uvd_fini(rdev);
        si_pcie_gart_fini(rdev);
        r600_vram_scratch_fini(rdev);
        radeon_gem_fini(rdev);
index f84cff0..1fb8ee2 100644 (file)
 #       define THREAD_TRACE_FINISH                      (55 << 0)
 
 /*
+ * UVD
+ */
+#define UVD_RBC_RB_RPTR                                        0xF690
+#define UVD_RBC_RB_WPTR                                        0xF694
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)        ((RADEON_PACKET_TYPE0 << 30) |                  \
index 6fd2556..b1c1a2a 100644 (file)
@@ -918,6 +918,7 @@ struct drm_radeon_gem_va {
 #define RADEON_CS_RING_GFX          0
 #define RADEON_CS_RING_COMPUTE      1
 #define RADEON_CS_RING_DMA          2
+#define RADEON_CS_RING_UVD          3
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */