media: rpivid: Map cmd buffer directly
authorJohn Cox <jc@kynesim.co.uk>
Mon, 29 Mar 2021 16:42:16 +0000 (17:42 +0100)
committerDom Cobley <popcornmix@gmail.com>
Mon, 21 Mar 2022 16:04:15 +0000 (16:04 +0000)
It is unnecessary to have a separate dmabuf to hold the cmd buffer.
Map it directly from the kmalloc.

Signed-off-by: John Cox <jc@kynesim.co.uk>
drivers/staging/media/rpivid/rpivid.h
drivers/staging/media/rpivid/rpivid_h265.c
drivers/staging/media/rpivid/rpivid_hw.c

index 9920980..ede92c9 100644 (file)
@@ -114,7 +114,6 @@ struct rpivid_ctx {
        unsigned int p1idx;
        atomic_t p1out;
        struct rpivid_gptr bitbufs[RPIVID_P1BUF_COUNT];
-       struct rpivid_gptr cmdbufs[RPIVID_P1BUF_COUNT];
 
        /* *** Should be in dev *** */
        unsigned int p2idx;
@@ -183,6 +182,8 @@ struct rpivid_dev {
        struct clk              *clock;
        struct clk_request      *hevc_req;
 
+       int                     cache_align;
+
        struct rpivid_hw_irq_ctrl ic_active1;
        struct rpivid_hw_irq_ctrl ic_active2;
 };
index a2cb2b5..9eb8704 100644 (file)
@@ -227,6 +227,9 @@ struct rpivid_dec_env {
        struct rpivid_q_aux *frame_aux;
        struct rpivid_q_aux *col_aux;
 
+       dma_addr_t cmd_addr;
+       size_t cmd_size;
+
        dma_addr_t pu_base_vc;
        dma_addr_t coeff_base_vc;
        u32 pu_stride;
@@ -234,7 +237,6 @@ struct rpivid_dec_env {
 
        struct rpivid_gptr *bit_copy_gptr;
        size_t bit_copy_len;
-       struct rpivid_gptr *cmd_copy_gptr;
 
 #define SLICE_MSGS_MAX (2 * HEVC_MAX_REFS * 8 + 3)
        u16 slice_msgs[SLICE_MSGS_MAX];
@@ -1499,22 +1501,17 @@ static int write_cmd_buffer(struct rpivid_dev *const dev,
                            struct rpivid_dec_env *const de,
                            const struct rpivid_dec_state *const s)
 {
-       // Copy commands out to dma buf
-       const size_t cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
-
-       if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
-               size_t cmd_alloc = round_up_size(cmd_size);
+       const size_t cmd_size = ALIGN(de->cmd_len * sizeof(de->cmd_fifo[0]),
+                                     dev->cache_align);
 
-               if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) {
-                       v4l2_err(&dev->v4l2_dev,
-                                "Alloc cmd buffer (%zu): FAILED\n", cmd_alloc);
-                       return -ENOMEM;
-               }
-               v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%zu): OK\n",
-                         cmd_alloc);
+       de->cmd_addr = dma_map_single(dev->dev, de->cmd_fifo,
+                                     cmd_size, DMA_TO_DEVICE);
+       if (dma_mapping_error(dev->dev, de->cmd_addr)) {
+               v4l2_err(&dev->v4l2_dev,
+                        "Map cmd buffer (%zu): FAILED\n", cmd_size);
+               return -ENOMEM;
        }
-
-       memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size);
+       de->cmd_size = cmd_size;
        return 0;
 }
 
@@ -1551,6 +1548,12 @@ static void dec_env_delete(struct rpivid_dec_env *const de)
        struct rpivid_ctx * const ctx = de->ctx;
        unsigned long lock_flags;
 
+       if (de->cmd_size) {
+               dma_unmap_single(ctx->dev->dev, de->cmd_addr, de->cmd_size,
+                                DMA_TO_DEVICE);
+               de->cmd_size = 0;
+       }
+
        aux_q_release(ctx, &de->frame_aux);
        aux_q_release(ctx, &de->col_aux);
 
@@ -1603,7 +1606,8 @@ static int dec_env_init(struct rpivid_ctx *const ctx)
 
                de->ctx = ctx;
                de->decode_order = i;
-               de->cmd_max = 1024;
+//             de->cmd_max = 1024;
+               de->cmd_max = 8096;
                de->cmd_fifo = kmalloc_array(de->cmd_max,
                                             sizeof(struct rpi_cmd),
                                             GFP_KERNEL);
@@ -1748,7 +1752,6 @@ static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
 
                de->bit_copy_gptr = ctx->bitbufs + ctx->p1idx;
                de->bit_copy_len = 0;
-               de->cmd_copy_gptr = ctx->cmdbufs + ctx->p1idx;
 
                de->frame_c_offset = ctx->dst_fmt.height * 128;
                de->frame_stride = ctx->dst_fmt.plane_fmt[0].bytesperline * 128;
@@ -2356,7 +2359,7 @@ static void phase1_claimed(struct rpivid_dev *const dev, void *v)
        rpivid_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de);
 
        // And start the h/w
-       apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
+       apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_addr);
 
        xtrace_ok(dev, de);
        return;
@@ -2400,8 +2403,6 @@ static void rpivid_h265_stop(struct rpivid_ctx *ctx)
 
        for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i)
                gptr_free(dev, ctx->bitbufs + i);
-       for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i)
-               gptr_free(dev, ctx->cmdbufs + i);
        for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
                gptr_free(dev, ctx->pu_bufs + i);
        for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
@@ -2451,13 +2452,6 @@ static int rpivid_h265_start(struct rpivid_ctx *ctx)
                goto fail;
        }
 
-       // 16k is plenty for most purposes but we will realloc if needed
-       for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) {
-               if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000,
-                              DMA_ATTR_FORCE_CONTIGUOUS))
-                       goto fail;
-       }
-
        // Finger in the air PU & Coeff alloc
        // Will be realloced if too small
        coeff_alloc = round_up_size(wxh);
index 396263b..2bb86d5 100644 (file)
@@ -331,6 +331,8 @@ int rpivid_hw_probe(struct rpivid_dev *dev)
        if (IS_ERR(dev->clock))
                return PTR_ERR(dev->clock);
 
+       dev->cache_align = dma_get_cache_alignment();
+
        // Disable IRQs & reset anything pending
        irq_write(dev, 0,
                  ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);