media: hantro: output encoded JPEG content directly to capture buffers
authorChen-Yu Tsai <wenst@chromium.org>
Fri, 7 Jan 2022 09:34:54 +0000 (10:34 +0100)
committerMauro Carvalho Chehab <mchehab@kernel.org>
Fri, 28 Jan 2022 12:27:12 +0000 (13:27 +0100)
Now that the JPEG header length is aligned with bus access boundaries,
the JPEG encoder can output to the capture buffers directly without
going through a bounce buffer.

Do just that, and get rid of all the bounce buffer related code.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
drivers/staging/media/hantro/TODO
drivers/staging/media/hantro/hantro.h
drivers/staging/media/hantro/hantro_drv.c
drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
drivers/staging/media/hantro/hantro_hw.h
drivers/staging/media/hantro/hantro_jpeg.c
drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
drivers/staging/media/hantro/rockchip_vpu_hw.c

index da181dc..1d7fed9 100644 (file)
@@ -4,7 +4,3 @@
   the uABI, it will be required to have the driver in staging.
 
   For this reason, we are keeping this driver in staging for now.
-
-* Instead of having a DMA bounce buffer, it could be possible to use a
-  normal buffer and memmove() the payload to make space for the header.
-  This might need to use extra JPEG markers for padding reasons.
index 06d0f35..357f83b 100644 (file)
@@ -259,7 +259,6 @@ struct hantro_ctx {
        /* Specific for particular codec modes. */
        union {
                struct hantro_h264_dec_hw_ctx h264_dec;
-               struct hantro_jpeg_enc_hw_ctx jpeg_enc;
                struct hantro_mpeg2_dec_hw_ctx mpeg2_dec;
                struct hantro_vp8_dec_hw_ctx vp8_dec;
                struct hantro_hevc_dec_hw_ctx hevc_dec;
index b376b9d..bc9bcb4 100644 (file)
@@ -219,21 +219,15 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
        if (ret)
                return ret;
 
+       dst_vq->bidirectional = true;
+       dst_vq->mem_ops = &vb2_dma_contig_memops;
+       dst_vq->dma_attrs = DMA_ATTR_ALLOC_SINGLE_PAGES;
        /*
-        * When encoding, the CAPTURE queue doesn't need dma memory,
-        * as the CPU needs to create the JPEG frames, from the
-        * hardware-produced JPEG payload.
-        *
-        * For the DMA destination buffer, we use a bounce buffer.
+        * The Kernel needs access to the JPEG destination buffer for the
+        * JPEG encoder to fill in the JPEG headers.
         */
-       if (ctx->is_encoder) {
-               dst_vq->mem_ops = &vb2_vmalloc_memops;
-       } else {
-               dst_vq->bidirectional = true;
-               dst_vq->mem_ops = &vb2_dma_contig_memops;
-               dst_vq->dma_attrs = DMA_ATTR_ALLOC_SINGLE_PAGES |
-                                   DMA_ATTR_NO_KERNEL_MAPPING;
-       }
+       if (!ctx->is_encoder)
+               dst_vq->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING;
 
        dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
        dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
index a71f90a..12d6950 100644 (file)
@@ -39,17 +39,23 @@ static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu,
 
 static void hantro_h1_jpeg_enc_set_buffers(struct hantro_dev *vpu,
                                           struct hantro_ctx *ctx,
-                                          struct vb2_buffer *src_buf)
+                                          struct vb2_buffer *src_buf,
+                                          struct vb2_buffer *dst_buf)
 {
        struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
        dma_addr_t src[3];
+       u32 size_left;
+
+       size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size;
+       if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
+               size_left = 0;
 
        WARN_ON(pix_fmt->num_planes > 3);
 
-       vepu_write_relaxed(vpu, ctx->jpeg_enc.bounce_buffer.dma,
+       vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
+                               ctx->vpu_dst_fmt->header_size,
                           H1_REG_ADDR_OUTPUT_STREAM);
-       vepu_write_relaxed(vpu, ctx->jpeg_enc.bounce_buffer.size,
-                          H1_REG_STR_BUF_LIMIT);
+       vepu_write_relaxed(vpu, size_left, H1_REG_STR_BUF_LIMIT);
 
        if (pix_fmt->num_planes == 1) {
                src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
@@ -121,7 +127,8 @@ int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
                           H1_REG_ENC_CTRL);
 
        hantro_h1_set_src_img_ctrl(vpu, ctx);
-       hantro_h1_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf);
+       hantro_h1_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+                                      &dst_buf->vb2_buf);
        hantro_h1_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable,
                                      jpeg_ctx.hw_chroma_qtable);
 
@@ -154,13 +161,6 @@ void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx)
        u32 bytesused = vepu_read(vpu, H1_REG_STR_BUF_LIMIT) / 8;
        struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
 
-       /*
-        * TODO: Rework the JPEG encoder to eliminate the need
-        * for a bounce buffer.
-        */
-       memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) +
-              ctx->vpu_dst_fmt->header_size,
-              ctx->jpeg_enc.bounce_buffer.cpu, bytesused);
        vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
                              ctx->vpu_dst_fmt->header_size + bytesused);
 }
index 4a19ae8..c1fd807 100644 (file)
@@ -43,15 +43,6 @@ struct hantro_aux_buf {
        unsigned long attrs;
 };
 
-/**
- * struct hantro_jpeg_enc_hw_ctx
- *
- * @bounce_buffer:     Bounce buffer
- */
-struct hantro_jpeg_enc_hw_ctx {
-       struct hantro_aux_buf bounce_buffer;
-};
-
 /* Max. number of entries in the DPB (HW limitation). */
 #define HANTRO_H264_DPB_SIZE           16
 
@@ -327,8 +318,6 @@ void hantro_g1_reset(struct hantro_ctx *ctx);
 
 int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
 int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx);
-int hantro_jpeg_enc_init(struct hantro_ctx *ctx);
-void hantro_jpeg_enc_exit(struct hantro_ctx *ctx);
 void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx);
 void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx);
 
index e63eeef..84d3f0b 100644 (file)
@@ -9,7 +9,6 @@
 
 #include <linux/align.h>
 #include <linux/build_bug.h>
-#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include "hantro_jpeg.h"
@@ -339,30 +338,3 @@ void hantro_jpeg_header_assemble(struct hantro_jpeg_ctx *ctx)
 
        jpeg_set_quality(ctx);
 }
-
-int hantro_jpeg_enc_init(struct hantro_ctx *ctx)
-{
-       ctx->jpeg_enc.bounce_buffer.size =
-               ctx->dst_fmt.plane_fmt[0].sizeimage -
-               ctx->vpu_dst_fmt->header_size;
-
-       ctx->jpeg_enc.bounce_buffer.cpu =
-               dma_alloc_attrs(ctx->dev->dev,
-                               ctx->jpeg_enc.bounce_buffer.size,
-                               &ctx->jpeg_enc.bounce_buffer.dma,
-                               GFP_KERNEL,
-                               DMA_ATTR_ALLOC_SINGLE_PAGES);
-       if (!ctx->jpeg_enc.bounce_buffer.cpu)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void hantro_jpeg_enc_exit(struct hantro_ctx *ctx)
-{
-       dma_free_attrs(ctx->dev->dev,
-                      ctx->jpeg_enc.bounce_buffer.size,
-                      ctx->jpeg_enc.bounce_buffer.cpu,
-                      ctx->jpeg_enc.bounce_buffer.dma,
-                      DMA_ATTR_ALLOC_SINGLE_PAGES);
-}
index 41a418f..8395c4d 100644 (file)
@@ -66,17 +66,23 @@ static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu,
 
 static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu,
                                               struct hantro_ctx *ctx,
-                                              struct vb2_buffer *src_buf)
+                                              struct vb2_buffer *src_buf,
+                                              struct vb2_buffer *dst_buf)
 {
        struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
        dma_addr_t src[3];
+       u32 size_left;
+
+       size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size;
+       if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
+               size_left = 0;
 
        WARN_ON(pix_fmt->num_planes > 3);
 
-       vepu_write_relaxed(vpu, ctx->jpeg_enc.bounce_buffer.dma,
+       vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
+                               ctx->vpu_dst_fmt->header_size,
                           VEPU_REG_ADDR_OUTPUT_STREAM);
-       vepu_write_relaxed(vpu, ctx->jpeg_enc.bounce_buffer.size,
-                          VEPU_REG_STR_BUF_LIMIT);
+       vepu_write_relaxed(vpu, size_left, VEPU_REG_STR_BUF_LIMIT);
 
        if (pix_fmt->num_planes == 1) {
                src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
@@ -137,6 +143,9 @@ int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
 
        memset(&jpeg_ctx, 0, sizeof(jpeg_ctx));
        jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
+       if (!jpeg_ctx.buffer)
+               return -ENOMEM;
+
        jpeg_ctx.width = ctx->dst_fmt.width;
        jpeg_ctx.height = ctx->dst_fmt.height;
        jpeg_ctx.quality = ctx->jpeg_quality;
@@ -147,7 +156,8 @@ int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
                           VEPU_REG_ENCODE_START);
 
        rockchip_vpu2_set_src_img_ctrl(vpu, ctx);
-       rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf);
+       rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+                                          &dst_buf->vb2_buf);
        rockchip_vpu2_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable,
                                          jpeg_ctx.hw_chroma_qtable);
 
@@ -182,13 +192,6 @@ void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx)
        u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8;
        struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
 
-       /*
-        * TODO: Rework the JPEG encoder to eliminate the need
-        * for a bounce buffer.
-        */
-       memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) +
-              ctx->vpu_dst_fmt->header_size,
-              ctx->jpeg_enc.bounce_buffer.cpu, bytesused);
        vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
                              ctx->vpu_dst_fmt->header_size + bytesused);
 }
index c203b60..163cf92 100644 (file)
@@ -343,9 +343,7 @@ static const struct hantro_codec_ops rk3066_vpu_codec_ops[] = {
        [HANTRO_MODE_JPEG_ENC] = {
                .run = hantro_h1_jpeg_enc_run,
                .reset = rockchip_vpu1_enc_reset,
-               .init = hantro_jpeg_enc_init,
                .done = hantro_h1_jpeg_enc_done,
-               .exit = hantro_jpeg_enc_exit,
        },
        [HANTRO_MODE_H264_DEC] = {
                .run = hantro_g1_h264_dec_run,
@@ -371,9 +369,7 @@ static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
        [HANTRO_MODE_JPEG_ENC] = {
                .run = hantro_h1_jpeg_enc_run,
                .reset = rockchip_vpu1_enc_reset,
-               .init = hantro_jpeg_enc_init,
                .done = hantro_h1_jpeg_enc_done,
-               .exit = hantro_jpeg_enc_exit,
        },
        [HANTRO_MODE_H264_DEC] = {
                .run = hantro_g1_h264_dec_run,
@@ -399,9 +395,7 @@ static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = {
        [HANTRO_MODE_JPEG_ENC] = {
                .run = rockchip_vpu2_jpeg_enc_run,
                .reset = rockchip_vpu2_enc_reset,
-               .init = hantro_jpeg_enc_init,
                .done = rockchip_vpu2_jpeg_enc_done,
-               .exit = hantro_jpeg_enc_exit,
        },
        [HANTRO_MODE_H264_DEC] = {
                .run = rockchip_vpu2_h264_dec_run,