From: Seungha Yang Date: Mon, 2 May 2022 13:59:59 +0000 (+0900) Subject: nvcodec: Add AV1 decoder X-Git-Tag: 1.22.0~1683 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=33e77782c6dbdb6271158053c8feb60458878fd7;p=platform%2Fupstream%2Fgstreamer.git nvcodec: Add AV1 decoder Adding GstCodecs based AV1 decoder element Part-of: --- diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.c new file mode 100644 index 0000000..0c6c8af --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.c @@ -0,0 +1,914 @@ +/* GStreamer + * Copyright (C) 2022 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "gstnvav1dec.h" +#include "gstnvdecoder.h" + +#include + +GST_DEBUG_CATEGORY_STATIC (gst_nv_av1_dec_debug); +#define GST_CAT_DEFAULT gst_nv_av1_dec_debug + +typedef struct _GstNvAV1Dec +{ + GstAV1Decoder parent; + + GstVideoCodecState *output_state; + + GstCudaContext *context; + GstNvDecoder *decoder; + + GstAV1SequenceHeaderOBU seq_hdr; + CUVIDPICPARAMS params; + + /* slice buffer which will be passed to CUVIDPICPARAMS::pBitstreamData */ + guint8 *bitstream_buffer; + /* allocated memory size of bitstream_buffer */ + gsize bitstream_buffer_alloc_size; + /* current offset of bitstream_buffer (per frame) */ + gsize bitstream_buffer_offset; + + guint *tile_offsets; + guint tile_offsets_alloc_len; + guint num_tiles; + + guint max_width; + guint max_height; + guint bitdepth; + guint8 film_grain_params_present; +} GstNvAV1Dec; + +typedef struct _GstNvAV1DecClass +{ + GstAV1DecoderClass parent_class; + guint cuda_device_id; +} GstNvAV1DecClass; + +typedef struct +{ + GstCaps *sink_caps; + GstCaps *src_caps; + guint cuda_device_id; +} GstNvAV1DecClassData; + +static GTypeClass *parent_class = NULL; + +#define GST_NV_AV1_DEC(object) ((GstNvAV1Dec *) (object)) +#define GST_NV_AV1_DEC_GET_CLASS(object) \ + (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvAV1DecClass)) + +static void gst_nv_av1_dec_set_context (GstElement * element, + GstContext * context); +static gboolean gst_nv_av1_dec_open (GstVideoDecoder * decoder); +static gboolean gst_nv_av1_dec_close (GstVideoDecoder * decoder); +static gboolean gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder); +static gboolean gst_nv_av1_dec_decide_allocation (GstVideoDecoder * + decoder, GstQuery * query); +static gboolean gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, + GstQuery * query); + +static GstFlowReturn gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, + const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size); +static GstFlowReturn gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder, + GstVideoCodecFrame * frame, GstAV1Picture * picture); +static GstAV1Picture *gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * + decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture); +static GstFlowReturn gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, + GstAV1Picture * picture, GstAV1Dpb * dpb); +static GstFlowReturn gst_nv_av1_dec_decode_tile (GstAV1Decoder * decoder, + GstAV1Picture * picture, GstAV1Tile * tile); +static GstFlowReturn gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder, + GstAV1Picture * picture); +static GstFlowReturn gst_nv_av1_dec_output_picture (GstAV1Decoder * + decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture); +static guint gst_nv_av1_dec_get_preferred_output_delay (GstAV1Decoder * decoder, + gboolean is_live); + +static void +gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass, + GstNvAV1DecClassData * cdata) +{ + GstElementClass *element_class = GST_ELEMENT_CLASS (klass); + GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass); + GstAV1DecoderClass *av1decoder_class = GST_AV1_DECODER_CLASS (klass); + + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_set_context); + + parent_class = (GTypeClass *) g_type_class_peek_parent (klass); + gst_element_class_set_static_metadata (element_class, "NVDEC AV1 Decoder", + "Codec/Decoder/Video/Hardware", + "NVIDIA AV1 video decoder", "Seungha Yang "); + + gst_element_class_add_pad_template (element_class, + gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, + cdata->sink_caps)); + gst_element_class_add_pad_template (element_class, + gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS, + cdata->src_caps)); + + decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_open); + decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_close); + decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_negotiate); + decoder_class->decide_allocation = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decide_allocation); + decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_src_query); + + av1decoder_class->new_sequence = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_sequence); + av1decoder_class->new_picture = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_picture); + av1decoder_class->duplicate_picture = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_duplicate_picture); + av1decoder_class->start_picture = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_start_picture); + av1decoder_class->decode_tile = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decode_tile); + av1decoder_class->end_picture = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_end_picture); + av1decoder_class->output_picture = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_output_picture); + av1decoder_class->get_preferred_output_delay = + GST_DEBUG_FUNCPTR (gst_nv_av1_dec_get_preferred_output_delay); + + klass->cuda_device_id = cdata->cuda_device_id; + + gst_caps_unref (cdata->sink_caps); + gst_caps_unref (cdata->src_caps); + g_free (cdata); +} + +static void +gst_nv_av1_dec_init (GstNvAV1Dec * self) +{ +} + +static void +gst_nv_av1_dec_set_context (GstElement * element, GstContext * context) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (element); + GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self); + + GST_DEBUG_OBJECT (self, "set context %s", + gst_context_get_context_type (context)); + + if (gst_cuda_handle_set_context (element, context, klass->cuda_device_id, + &self->context)) { + goto done; + } + + if (self->decoder) + gst_nv_decoder_handle_set_context (self->decoder, element, context); + +done: + GST_ELEMENT_CLASS (parent_class)->set_context (element, context); +} + +static gboolean +gst_nv_av1_dec_open (GstVideoDecoder * decoder) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self); + + if (!gst_cuda_ensure_element_context (GST_ELEMENT (self), + klass->cuda_device_id, &self->context)) { + GST_ERROR_OBJECT (self, "Required element data is unavailable"); + return FALSE; + } + + self->decoder = gst_nv_decoder_new (self->context); + if (!self->decoder) { + GST_ERROR_OBJECT (self, "Failed to create decoder object"); + gst_clear_object (&self->context); + + return FALSE; + } + + return TRUE; +} + +static void +gst_nv_av1_dec_reset_bitstream_params (GstNvAV1Dec * self) +{ + self->bitstream_buffer_offset = 0; + self->num_tiles = 0; + + self->params.nBitstreamDataLen = 0; + self->params.pBitstreamData = NULL; + self->params.nNumSlices = 0; + self->params.pSliceDataOffsets = NULL; +} + +static gboolean +gst_nv_av1_dec_close (GstVideoDecoder * decoder) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + + g_clear_pointer (&self->output_state, gst_video_codec_state_unref); + gst_clear_object (&self->decoder); + gst_clear_object (&self->context); + + gst_nv_av1_dec_reset_bitstream_params (self); + + g_free (self->bitstream_buffer); + self->bitstream_buffer = NULL; + + g_free (self->tile_offsets); + self->tile_offsets = NULL; + + self->bitstream_buffer_alloc_size = 0; + self->tile_offsets_alloc_len = 0; + + return TRUE; +} + +static gboolean +gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + GstAV1Decoder *av1dec = GST_AV1_DECODER (decoder); + + GST_DEBUG_OBJECT (self, "negotiate"); + + gst_nv_decoder_negotiate (self->decoder, decoder, av1dec->input_state, + &self->output_state); + + return GST_VIDEO_DECODER_CLASS (parent_class)->negotiate (decoder); +} + +static gboolean +gst_nv_av1_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + + if (!gst_nv_decoder_decide_allocation (self->decoder, decoder, query)) { + GST_WARNING_OBJECT (self, "Failed to handle decide allocation"); + return FALSE; + } + + return GST_VIDEO_DECODER_CLASS (parent_class)->decide_allocation + (decoder, query); +} + +static gboolean +gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + + switch (GST_QUERY_TYPE (query)) { + case GST_QUERY_CONTEXT: + if (gst_cuda_handle_context_query (GST_ELEMENT (decoder), query, + self->context)) { + return TRUE; + } else if (self->decoder && + gst_nv_decoder_handle_context_query (self->decoder, decoder, query)) { + return TRUE; + } + break; + default: + break; + } + + return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); +} + +static GstFlowReturn +gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, + const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + gboolean modified = FALSE; + guint max_width, max_height; + + GST_LOG_OBJECT (self, "new sequence"); + + if (seq_hdr->seq_profile != GST_AV1_PROFILE_0) { + GST_WARNING_OBJECT (self, "Unsupported profile %d", seq_hdr->seq_profile); + return GST_FLOW_NOT_NEGOTIATED; + } + + if (seq_hdr->num_planes != 3) { + GST_WARNING_OBJECT (self, "Monochrome is not supported"); + return GST_FLOW_NOT_NEGOTIATED; + } + + self->seq_hdr = *seq_hdr; + + if (self->bitdepth != seq_hdr->bit_depth) { + GST_INFO_OBJECT (self, "Bitdepth changed %d -> %d", self->bitdepth, + seq_hdr->bit_depth); + self->bitdepth = seq_hdr->bit_depth; + modified = TRUE; + } + + max_width = seq_hdr->max_frame_width_minus_1 + 1; + max_height = seq_hdr->max_frame_height_minus_1 + 1; + + if (self->max_width != max_width || self->max_height != max_height) { + GST_INFO_OBJECT (self, "Resolution changed %dx%d -> %dx%d", + self->max_width, self->max_height, max_width, max_height); + self->max_width = max_width; + self->max_height = max_height; + modified = TRUE; + } + + if (self->film_grain_params_present != seq_hdr->film_grain_params_present) { + GST_INFO_OBJECT (self, "Film grain present changed %d -> %d", + self->film_grain_params_present, seq_hdr->film_grain_params_present); + self->film_grain_params_present = seq_hdr->film_grain_params_present; + modified = TRUE; + } + + if (modified || !gst_nv_decoder_is_configured (self->decoder)) { + GstVideoInfo info; + GstVideoFormat out_format = GST_VIDEO_FORMAT_UNKNOWN; + + if (self->bitdepth == 8) { + out_format = GST_VIDEO_FORMAT_NV12; + } else if (self->bitdepth == 10) { + out_format = GST_VIDEO_FORMAT_P010_10LE; + } else { + GST_WARNING_OBJECT (self, "Invalid bit-depth %d", seq_hdr->bit_depth); + return GST_FLOW_NOT_NEGOTIATED; + } + + gst_video_info_set_format (&info, + out_format, self->max_width, self->max_height); + + if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_AV1, + &info, self->max_width, self->max_height, self->bitdepth, + max_dpb_size, self->film_grain_params_present ? TRUE : FALSE)) { + GST_ERROR_OBJECT (self, "Failed to create decoder"); + return GST_FLOW_NOT_NEGOTIATED; + } + + if (!gst_video_decoder_negotiate (GST_VIDEO_DECODER (self))) { + GST_ERROR_OBJECT (self, "Failed to negotiate with downstream"); + return GST_FLOW_NOT_NEGOTIATED; + } + } + + return GST_FLOW_OK; +} + +static GstFlowReturn +gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder, + GstVideoCodecFrame * frame, GstAV1Picture * picture) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + GstNvDecoderFrame *nv_frame; + + nv_frame = gst_nv_decoder_new_frame (self->decoder); + if (!nv_frame) { + GST_ERROR_OBJECT (self, "No available decoder frame"); + return GST_FLOW_ERROR; + } + + GST_LOG_OBJECT (self, + "New decoder frame %p (index %d)", nv_frame, nv_frame->index); + + gst_av1_picture_set_user_data (picture, + nv_frame, (GDestroyNotify) gst_nv_decoder_frame_unref); + + return GST_FLOW_OK; +} + +static GstNvDecoderFrame * +gst_nv_av1_dec_get_decoder_frame_from_picture (GstNvAV1Dec * self, + GstAV1Picture * picture) +{ + GstNvDecoderFrame *frame; + + frame = (GstNvDecoderFrame *) gst_av1_picture_get_user_data (picture); + + if (!frame) + GST_DEBUG_OBJECT (self, "current picture does not have decoder frame"); + + return frame; +} + +static GstAV1Picture * +gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * decoder, + GstVideoCodecFrame * frame, GstAV1Picture * picture) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + GstNvDecoderFrame *nv_frame; + GstAV1Picture *new_picture; + + nv_frame = gst_nv_av1_dec_get_decoder_frame_from_picture (self, picture); + + if (!nv_frame) { + GST_ERROR_OBJECT (self, "Parent picture does not have decoder frame"); + return NULL; + } + + new_picture = gst_av1_picture_new (); + new_picture->frame_hdr = picture->frame_hdr; + + gst_av1_picture_set_user_data (new_picture, + gst_nv_decoder_frame_ref (nv_frame), + (GDestroyNotify) gst_nv_decoder_frame_unref); + + return new_picture; +} + +static inline guint8 +gst_nv_av1_dec_get_lr_unit_size (guint size) +{ + switch (size) { + case 32: + return 0; + case 64: + return 1; + case 128: + return 2; + case 256: + return 3; + default: + break; + } + + return 3; +} + +static GstFlowReturn +gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, + GstAV1Dpb * dpb) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + CUVIDPICPARAMS *params = &self->params; + CUVIDAV1PICPARAMS *av1_params = ¶ms->CodecSpecific.av1; + const GstAV1SequenceHeaderOBU *seq_hdr = &self->seq_hdr; + const GstAV1FrameHeaderOBU *frame_hdr = &picture->frame_hdr; + const GstAV1GlobalMotionParams *gmp = &frame_hdr->global_motion_params; + const GstAV1QuantizationParams *qp = &frame_hdr->quantization_params; + const GstAV1TileInfo *ti = &frame_hdr->tile_info; + const GstAV1CDEFParams *cp = &frame_hdr->cdef_params; + const GstAV1SegmenationParams *sp = &frame_hdr->segmentation_params; + const GstAV1LoopFilterParams *lp = &frame_hdr->loop_filter_params; + const GstAV1LoopRestorationParams *lrp = &frame_hdr->loop_restoration_params; + const GstAV1FilmGrainParams *fgp = &frame_hdr->film_grain_params; + GstNvDecoderFrame *frame; + GstNvDecoderFrame *other_frame; + GstAV1Picture *other_pic; + guint i, j; + + frame = gst_nv_av1_dec_get_decoder_frame_from_picture (self, picture); + if (!frame) { + GST_ERROR_OBJECT (self, "Decoder frame is unavailable"); + return GST_FLOW_ERROR; + } + + memset (params, 0, sizeof (CUVIDPICPARAMS)); + + params->PicWidthInMbs = GST_ROUND_UP_16 (frame_hdr->frame_width) >> 4; + params->FrameHeightInMbs = GST_ROUND_UP_16 (frame_hdr->frame_height) >> 4; + params->CurrPicIdx = frame->index; + params->intra_pic_flag = frame_hdr->frame_is_intra; + + av1_params->width = frame_hdr->frame_width; + av1_params->height = frame_hdr->frame_height; + av1_params->frame_offset = frame_hdr->order_hint; + av1_params->decodePicIdx = frame->decode_frame_index; + + /* sequence header */ + av1_params->profile = seq_hdr->seq_profile; + av1_params->use_128x128_superblock = seq_hdr->use_128x128_superblock; + av1_params->subsampling_x = seq_hdr->color_config.subsampling_x; + av1_params->subsampling_y = seq_hdr->color_config.subsampling_y; + av1_params->mono_chrome = seq_hdr->color_config.mono_chrome; + av1_params->bit_depth_minus8 = seq_hdr->bit_depth - 8; + av1_params->enable_filter_intra = seq_hdr->enable_filter_intra; + av1_params->enable_intra_edge_filter = seq_hdr->enable_intra_edge_filter; + av1_params->enable_interintra_compound = seq_hdr->enable_interintra_compound; + av1_params->enable_masked_compound = seq_hdr->enable_masked_compound; + av1_params->enable_dual_filter = seq_hdr->enable_dual_filter; + av1_params->enable_order_hint = seq_hdr->enable_order_hint; + av1_params->order_hint_bits_minus1 = seq_hdr->order_hint_bits_minus_1; + av1_params->enable_jnt_comp = seq_hdr->enable_jnt_comp; + av1_params->enable_superres = seq_hdr->enable_superres; + av1_params->enable_cdef = seq_hdr->enable_cdef; + av1_params->enable_restoration = seq_hdr->enable_restoration; + av1_params->enable_fgs = seq_hdr->film_grain_params_present; + + /* frame header */ + av1_params->frame_type = frame_hdr->frame_type; + av1_params->show_frame = frame_hdr->show_frame; + av1_params->disable_cdf_update = frame_hdr->disable_cdf_update; + av1_params->allow_screen_content_tools = + frame_hdr->allow_screen_content_tools; + if (frame_hdr->force_integer_mv || frame_hdr->frame_is_intra) + av1_params->force_integer_mv = 1; + else + av1_params->force_integer_mv = 0; + if (frame_hdr->use_superres) { + av1_params->coded_denom = + frame_hdr->superres_denom - GST_AV1_SUPERRES_DENOM_MIN; + } else { + av1_params->coded_denom = 0; + } + av1_params->allow_intrabc = frame_hdr->allow_intrabc; + av1_params->allow_high_precision_mv = frame_hdr->allow_high_precision_mv; + av1_params->interp_filter = frame_hdr->interpolation_filter; + av1_params->switchable_motion_mode = frame_hdr->is_motion_mode_switchable; + av1_params->use_ref_frame_mvs = frame_hdr->use_ref_frame_mvs; + av1_params->disable_frame_end_update_cdf = + frame_hdr->disable_frame_end_update_cdf; + av1_params->delta_q_present = qp->delta_q_present; + av1_params->delta_q_res = qp->delta_q_res; + av1_params->using_qmatrix = qp->using_qmatrix; + av1_params->coded_lossless = frame_hdr->coded_lossless; + av1_params->use_superres = frame_hdr->use_superres; + av1_params->tx_mode = frame_hdr->tx_mode; + av1_params->reference_mode = frame_hdr->reference_select; + av1_params->allow_warped_motion = frame_hdr->allow_warped_motion; + av1_params->reduced_tx_set = frame_hdr->reduced_tx_set; + av1_params->skip_mode = frame_hdr->skip_mode_present; + + /* tiling info */ + av1_params->num_tile_cols = ti->tile_cols; + av1_params->num_tile_rows = ti->tile_rows; + av1_params->context_update_tile_id = ti->context_update_tile_id; + for (i = 0; i < ti->tile_cols; i++) + av1_params->tile_widths[i] = ti->width_in_sbs_minus_1[i] + 1; + + for (i = 0; i < ti->tile_rows; i++) + av1_params->tile_heights[i] = ti->height_in_sbs_minus_1[i] + 1; + + /* CDEF */ + av1_params->cdef_damping_minus_3 = cp->cdef_damping - 3; + av1_params->cdef_bits = cp->cdef_bits; + for (i = 0; i < GST_AV1_CDEF_MAX; i++) { + guint8 primary; + guint8 secondary; + + primary = cp->cdef_y_pri_strength[i]; + secondary = cp->cdef_y_sec_strength[i]; + if (secondary == 4) + secondary--; + + av1_params->cdef_y_strength[i] = (primary & 0x0f) | (secondary << 4); + + primary = cp->cdef_uv_pri_strength[i]; + secondary = cp->cdef_uv_sec_strength[i]; + if (secondary == 4) + secondary--; + + av1_params->cdef_uv_strength[i] = (primary & 0x0f) | (secondary << 4); + } + + /* SkipModeFrames */ + if (frame_hdr->skip_mode_present) { + av1_params->SkipModeFrame0 = frame_hdr->skip_mode_frame[0]; + av1_params->SkipModeFrame1 = frame_hdr->skip_mode_frame[1]; + } + + /* qp information */ + av1_params->base_qindex = qp->base_q_idx; + av1_params->qp_y_dc_delta_q = qp->delta_q_y_dc; + av1_params->qp_u_dc_delta_q = qp->delta_q_u_dc; + av1_params->qp_u_ac_delta_q = qp->delta_q_u_ac; + av1_params->qp_v_dc_delta_q = qp->delta_q_v_dc; + av1_params->qp_v_ac_delta_q = qp->delta_q_v_ac; + av1_params->qm_y = qp->qm_y; + av1_params->qm_u = qp->qm_u; + av1_params->qm_v = qp->qm_v; + + /* segmentation */ + av1_params->segmentation_enabled = sp->segmentation_enabled; + av1_params->segmentation_update_map = sp->segmentation_update_map; + av1_params->segmentation_update_data = sp->segmentation_update_data; + av1_params->segmentation_temporal_update = sp->segmentation_temporal_update; + for (i = 0; i < GST_AV1_MAX_SEGMENTS; i++) { + for (j = 0; j < GST_AV1_SEG_LVL_MAX; j++) { + av1_params->segmentation_feature_data[i][j] = sp->feature_data[i][j]; + av1_params->segmentation_feature_mask[i] |= + sp->feature_enabled[i][j] << j; + } + } + + /* loopfilter */ + av1_params->loop_filter_level[0] = lp->loop_filter_level[0]; + av1_params->loop_filter_level[1] = lp->loop_filter_level[1]; + av1_params->loop_filter_level_u = lp->loop_filter_level[2]; + av1_params->loop_filter_level_v = lp->loop_filter_level[3]; + av1_params->loop_filter_sharpness = lp->loop_filter_sharpness; + for (i = 0; i < GST_AV1_TOTAL_REFS_PER_FRAME; i++) { + av1_params->loop_filter_ref_deltas[i] = lp->loop_filter_ref_deltas[i]; + } + av1_params->loop_filter_mode_deltas[0] = lp->loop_filter_mode_deltas[0]; + av1_params->loop_filter_mode_deltas[1] = lp->loop_filter_mode_deltas[1]; + av1_params->loop_filter_delta_enabled = lp->loop_filter_delta_enabled; + av1_params->loop_filter_delta_update = lp->loop_filter_delta_update; + av1_params->delta_lf_present = lp->delta_lf_present; + av1_params->delta_lf_res = lp->delta_lf_res; + av1_params->delta_lf_multi = lp->delta_lf_multi; + + /* restoration */ + for (i = 0; i < 3; i++) { + av1_params->lr_unit_size[i] = + gst_nv_av1_dec_get_lr_unit_size (lrp->loop_restoration_size[i]); + } + av1_params->lr_type[0] = lrp->frame_restoration_type[0]; + av1_params->lr_type[1] = lrp->frame_restoration_type[1]; + av1_params->lr_type[2] = lrp->frame_restoration_type[2]; + + /* reference frames */ + for (i = 0; i < GST_AV1_TOTAL_REFS_PER_FRAME; i++) { + guint8 ref_idx = 0xff; + + other_pic = dpb->pic_list[i]; + if (other_pic) { + other_frame = + gst_nv_av1_dec_get_decoder_frame_from_picture (self, other_pic); + if (!other_frame) { + GST_ERROR_OBJECT (self, "reference frame is unavailable"); + return GST_FLOW_ERROR; + } + + ref_idx = other_frame->decode_frame_index; + } + + av1_params->ref_frame_map[i] = ref_idx; + } + + if (frame_hdr->primary_ref_frame == GST_AV1_PRIMARY_REF_NONE) { + av1_params->primary_ref_frame = 0xff; + } else { + guint8 primary_ref_idx; + + g_assert (frame_hdr->primary_ref_frame < 8); + + primary_ref_idx = frame_hdr->ref_frame_idx[frame_hdr->primary_ref_frame]; + av1_params->primary_ref_frame = av1_params->ref_frame_map[primary_ref_idx]; + } + av1_params->temporal_layer_id = picture->temporal_id; + av1_params->spatial_layer_id = picture->spatial_id; + + /* ref frame list and global motion */ + for (i = 0; i < GST_AV1_REFS_PER_FRAME; i++) { + gint8 ref_idx = frame_hdr->ref_frame_idx[i]; + + other_pic = NULL; + + if (ref_idx >= 0) + other_pic = dpb->pic_list[ref_idx]; + + if (other_pic) { + other_frame = + gst_nv_av1_dec_get_decoder_frame_from_picture (self, other_pic); + + av1_params->ref_frame[i].index = other_frame->decode_frame_index; + av1_params->ref_frame[i].width = other_pic->frame_hdr.frame_width; + av1_params->ref_frame[i].height = other_pic->frame_hdr.frame_height; + } else { + av1_params->ref_frame[i].index = 0xff; + } + + av1_params->global_motion[i].invalid = gmp->invalid[i]; + av1_params->global_motion[i].wmtype = + gmp->gm_type[GST_AV1_REF_LAST_FRAME + i]; + for (j = 0; j < 6; j++) { + av1_params->global_motion[i].wmmat[j] = + gmp->gm_params[GST_AV1_REF_LAST_FRAME + i][j]; + } + } + + /* film grain params */ + if (seq_hdr->film_grain_params_present) { + av1_params->apply_grain = fgp->apply_grain; + av1_params->overlap_flag = fgp->overlap_flag; + av1_params->scaling_shift_minus8 = fgp->grain_scaling_minus_8; + av1_params->chroma_scaling_from_luma = fgp->chroma_scaling_from_luma; + av1_params->ar_coeff_lag = fgp->ar_coeff_lag; + av1_params->ar_coeff_shift_minus6 = fgp->ar_coeff_shift_minus_6; + av1_params->grain_scale_shift = fgp->grain_scale_shift; + av1_params->clip_to_restricted_range = fgp->clip_to_restricted_range; + av1_params->num_y_points = fgp->num_y_points; + for (i = 0; i < fgp->num_y_points && i < 14; i++) { + av1_params->scaling_points_y[i][0] = fgp->point_y_value[i]; + av1_params->scaling_points_y[i][1] = fgp->point_y_scaling[i]; + } + + av1_params->num_cb_points = fgp->num_cb_points; + for (i = 0; i < fgp->num_cb_points && i < 10; i++) { + av1_params->scaling_points_cb[i][0] = fgp->point_cb_value[i]; + av1_params->scaling_points_cb[i][1] = fgp->point_cb_scaling[i]; + } + + av1_params->num_cr_points = fgp->num_cr_points; + for (i = 0; i < fgp->num_cr_points && i < 10; i++) { + av1_params->scaling_points_cr[i][0] = fgp->point_cr_value[i]; + av1_params->scaling_points_cr[i][1] = fgp->point_cr_scaling[i]; + } + + av1_params->random_seed = fgp->grain_seed; + for (i = 0; i < 24; i++) { + av1_params->ar_coeffs_y[i] = (short) fgp->ar_coeffs_y_plus_128[i] - 128; + } + + for (i = 0; i < 25; i++) { + av1_params->ar_coeffs_cb[i] = (short) fgp->ar_coeffs_cb_plus_128[i] - 128; + av1_params->ar_coeffs_cr[i] = (short) fgp->ar_coeffs_cr_plus_128[i] - 128; + } + av1_params->cb_mult = fgp->cb_mult; + av1_params->cb_luma_mult = fgp->cb_luma_mult; + av1_params->cb_offset = fgp->cb_offset; + av1_params->cr_mult = fgp->cr_mult; + av1_params->cr_luma_mult = fgp->cr_luma_mult; + av1_params->cr_offset = fgp->cr_offset; + } + + gst_nv_av1_dec_reset_bitstream_params (self); + + return GST_FLOW_OK; +} + +static GstFlowReturn +gst_nv_av1_dec_decode_tile (GstAV1Decoder * decoder, + GstAV1Picture * picture, GstAV1Tile * tile) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + GstAV1TileGroupOBU *tile_group = &tile->tile_group; + guint i; + guint buffer_size; + + if (tile_group->num_tiles * 2 > self->tile_offsets_alloc_len) { + self->tile_offsets_alloc_len = tile_group->num_tiles * 2; + + self->tile_offsets = (guint *) g_realloc_n (self->tile_offsets, + self->tile_offsets_alloc_len, sizeof (guint)); + } + + self->num_tiles = tile_group->num_tiles; + + for (i = tile_group->tg_start; i <= tile_group->tg_end; i++) { + guint offset = self->bitstream_buffer_offset + + tile_group->entry[i].tile_offset; + + self->tile_offsets[i * 2] = offset; + self->tile_offsets[i * 2 + 1] = offset + tile_group->entry[i].tile_size; + } + + buffer_size = self->bitstream_buffer_offset + tile->obu.obu_size; + if (buffer_size > self->bitstream_buffer_alloc_size) { + guint alloc_size = buffer_size * 2; + + self->bitstream_buffer = (guint8 *) g_realloc (self->bitstream_buffer, + alloc_size); + self->bitstream_buffer_alloc_size = alloc_size; + } + + memcpy (self->bitstream_buffer + self->bitstream_buffer_offset, + tile->obu.data, tile->obu.obu_size); + + self->bitstream_buffer_offset += tile->obu.obu_size; + + return GST_FLOW_OK; +} + +static GstFlowReturn +gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder, GstAV1Picture * picture) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + gboolean ret; + CUVIDPICPARAMS *params = &self->params; + + params->nBitstreamDataLen = self->bitstream_buffer_offset; + params->pBitstreamData = self->bitstream_buffer; + params->nNumSlices = self->num_tiles; + params->pSliceDataOffsets = self->tile_offsets; + + ret = gst_nv_decoder_decode_picture (self->decoder, params); + + if (!ret) { + GST_ERROR_OBJECT (self, "Failed to decode picture"); + return GST_FLOW_ERROR; + } + + return GST_FLOW_OK; +} + +static GstFlowReturn +gst_nv_av1_dec_output_picture (GstAV1Decoder * decoder, + GstVideoCodecFrame * frame, GstAV1Picture * picture) +{ + GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); + GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); + GstNvDecoderFrame *decoder_frame; + + GST_LOG_OBJECT (self, "Outputting picture %p", picture); + + decoder_frame = (GstNvDecoderFrame *) gst_av1_picture_get_user_data (picture); + if (!decoder_frame) { + GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); + goto error; + } + + if (!gst_nv_decoder_finish_frame (self->decoder, vdec, decoder_frame, + &frame->output_buffer)) { + GST_ERROR_OBJECT (self, "Failed to handle output picture"); + goto error; + } + + gst_av1_picture_unref (picture); + + return gst_video_decoder_finish_frame (vdec, frame); + +error: + gst_video_decoder_drop_frame (vdec, frame); + gst_av1_picture_unref (picture); + + return GST_FLOW_ERROR; +} + +static guint +gst_nv_av1_dec_get_preferred_output_delay (GstAV1Decoder * decoder, + gboolean is_live) +{ + /* Prefer to zero latency for live pipeline */ + if (is_live) + return 0; + + /* NVCODEC SDK uses 4 frame delay for better throughput performance */ + return 4; +} + +void +gst_nv_av1_dec_register (GstPlugin * plugin, guint device_id, guint rank, + GstCaps * sink_caps, GstCaps * src_caps) +{ + GType type; + gchar *type_name; + gchar *feature_name; + guint index = 0; + GTypeInfo type_info = { + sizeof (GstNvAV1DecClass), + NULL, + NULL, + (GClassInitFunc) gst_nv_av1_dec_class_init, + NULL, + NULL, + sizeof (GstNvAV1Dec), + 0, + (GInstanceInitFunc) gst_nv_av1_dec_init, + }; + GstNvAV1DecClassData *cdata; + + GST_DEBUG_CATEGORY_INIT (gst_nv_av1_dec_debug, "nvav1dec", 0, "nvav1dec"); + + cdata = g_new0 (GstNvAV1DecClassData, 1); + cdata->sink_caps = gst_caps_ref (sink_caps); + cdata->src_caps = gst_caps_ref (src_caps); + cdata->cuda_device_id = device_id; + + type_info.class_data = cdata; + + type_name = g_strdup ("GstNvAV1Dec"); + feature_name = g_strdup ("nvav1dec"); + + while (g_type_from_name (type_name)) { + index++; + g_free (type_name); + g_free (feature_name); + type_name = g_strdup_printf ("GstNvAV1Device%dDec", index); + feature_name = g_strdup_printf ("nvav1device%ddec", index); + } + + type = g_type_register_static (GST_TYPE_AV1_DECODER, + type_name, &type_info, 0); + + /* make lower rank than default device */ + if (rank > 0 && index != 0) + rank--; + + if (index != 0) + gst_element_type_set_skip_documentation (type); + + if (!gst_element_register (plugin, feature_name, rank, type)) + GST_WARNING ("Failed to register plugin '%s'", type_name); + + g_free (type_name); + g_free (feature_name); +} diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.h new file mode 100644 index 0000000..0924dd3 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.h @@ -0,0 +1,34 @@ +/* GStreamer + * Copyright (C) 2022 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include +#include + +G_BEGIN_DECLS + +void gst_nv_av1_dec_register (GstPlugin * plugin, + guint device_id, + guint rank, + GstCaps * sink_caps, + GstCaps * src_caps); + +G_END_DECLS + diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c index cf1436d..bbffc7b 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c @@ -84,6 +84,7 @@ struct _GstNvDecoder GstNvDecoderFrameInfo *frame_pool; guint pool_size; + gboolean alloc_aux_frame; GstVideoInfo info; GstVideoInfo coded_info; @@ -254,11 +255,12 @@ gst_nv_decoder_reset (GstNvDecoder * self) gboolean gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, GstVideoInfo * info, gint coded_width, gint coded_height, - guint coded_bitdepth, guint pool_size) + guint coded_bitdepth, guint pool_size, gboolean alloc_aux_frame) { CUVIDDECODECREATEINFO create_info = { 0, }; GstVideoFormat format; gboolean ret; + guint alloc_size; g_return_val_if_fail (GST_IS_NV_DECODER (decoder), FALSE); g_return_val_if_fail (codec < cudaVideoCodec_NumCodecs, FALSE); @@ -279,10 +281,20 @@ gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec, /* Additional 2 frame margin */ pool_size += 2; + /* Need pool size * 2 for decode-only (used for reference) frame + * and output frame, AV1 film grain case for example */ + if (alloc_aux_frame) { + alloc_size = pool_size * 2; + } else { + alloc_size = pool_size; + } + + decoder->alloc_aux_frame = alloc_aux_frame; + /* FIXME: check aligned resolution or actual coded resolution */ create_info.ulWidth = GST_VIDEO_INFO_WIDTH (&decoder->coded_info); create_info.ulHeight = GST_VIDEO_INFO_HEIGHT (&decoder->coded_info); - create_info.ulNumDecodeSurfaces = pool_size; + create_info.ulNumDecodeSurfaces = alloc_size; create_info.CodecType = codec; create_info.ChromaFormat = chroma_format_from_video_format (format); create_info.ulCreationFlags = cudaVideoCreate_Default; @@ -355,8 +367,15 @@ gst_nv_decoder_new_frame (GstNvDecoder * decoder) frame = g_new0 (GstNvDecoderFrame, 1); frame->index = index_to_use; + frame->decode_frame_index = index_to_use; frame->decoder = gst_object_ref (decoder); frame->ref_count = 1; + if (decoder->alloc_aux_frame) { + /* [0, pool_size - 1]: output picture + * [pool_size, pool_size * 2 - 1]: decoder output without film-grain, + * used for reference picture */ + frame->decode_frame_index = index_to_use + decoder->pool_size; + } GST_LOG_OBJECT (decoder, "New frame %p (index %d)", frame, frame->index); @@ -1001,6 +1020,10 @@ gst_nv_decoder_get_supported_codec_profiles (GValue * profiles, ret = TRUE; break; + case cudaVideoCodec_AV1: + g_value_set_static_string (&val, "main"); + gst_value_list_append_value (profiles, &val); + ret = TRUE; default: break; } @@ -1045,7 +1068,8 @@ const GstNvdecoderCodecMap codec_map_list[] = { "video/x-h265, stream-format = (string) byte-stream" ", alignment = (string) au, profile = (string) { main }"}, {cudaVideoCodec_VP8, "vp8", "video/x-vp8"}, - {cudaVideoCodec_VP9, "vp9", "video/x-vp9"} + {cudaVideoCodec_VP9, "vp9", "video/x-vp9"}, + {cudaVideoCodec_AV1, "av1", "video/x-av1, alignment = (string) frame"} }; gboolean diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h index 6fcba2d..635d63d 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.h @@ -40,6 +40,9 @@ typedef struct _GstNvDecoderFrame gboolean mapped; + /* Extra frame allocated for AV1 film grain */ + gint decode_frame_index; + /*< private >*/ GstNvDecoder *decoder; @@ -56,7 +59,8 @@ gboolean gst_nv_decoder_configure (GstNvDecoder * decoder, gint coded_width, gint coded_height, guint coded_bitdepth, - guint pool_size); + guint pool_size, + gboolean alloc_aux_frame); GstNvDecoderFrame * gst_nv_decoder_new_frame (GstNvDecoder * decoder); diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.c index 1d3edc1..bcb933f 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.c @@ -447,7 +447,7 @@ gst_nv_h264_dec_new_sequence (GstH264Decoder * decoder, const GstH264SPS * sps, /* FIXME: add support cudaVideoCodec_H264_SVC and cudaVideoCodec_H264_MVC */ if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_H264, &info, self->coded_width, self->coded_height, - self->bitdepth, max_dpb_size)) { + self->bitdepth, max_dpb_size, FALSE)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.c index 4fce77b..ea0f91f 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.c @@ -399,7 +399,7 @@ gst_nv_h265_dec_new_sequence (GstH265Decoder * decoder, const GstH265SPS * sps, if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_HEVC, &info, self->coded_width, self->coded_height, - self->bitdepth, max_dpb_size)) { + self->bitdepth, max_dpb_size, FALSE)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.c index 0f05b08..3cbfe51 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.c @@ -256,7 +256,7 @@ gst_nv_vp8_dec_new_sequence (GstVp8Decoder * decoder, if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_VP8, &info, self->width, self->height, 8, - max_dpb_size)) { + max_dpb_size, FALSE)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.c index d172457..fe2a9fa 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.c @@ -269,7 +269,7 @@ gst_nv_vp9_dec_new_sequence (GstVp9Decoder * decoder, gst_video_info_set_format (&info, out_format, self->width, self->height); if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_VP9, &info, self->width, self->height, - frame_hdr->bit_depth, max_dpb_size)) { + frame_hdr->bit_depth, max_dpb_size, FALSE)) { GST_ERROR_OBJECT (self, "Failed to configure decoder"); return GST_FLOW_NOT_NEGOTIATED; } diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build index 939c376..1794d3f 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build +++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build @@ -6,6 +6,7 @@ nvcodec_sources = [ 'gstnvh265enc.c', 'gstnvdec.c', 'gstcuvidloader.c', + 'gstnvav1dec.c', 'gstnvdecoder.c', 'gstnvh264dec.c', 'gstnvh265dec.c', diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c index 3d74c3f..87afd31 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c @@ -31,6 +31,7 @@ #include "gstnvdec.h" #include "gstnvenc.h" +#include "gstnvav1dec.h" #include "gstnvh264dec.h" #include "gstnvh265dec.h" #include "gstnvvp8dec.h" @@ -219,6 +220,12 @@ plugin_init (GstPlugin * plugin) i, GST_RANK_PRIMARY, sink_template, src_template, TRUE); } break; + case cudaVideoCodec_AV1: + gst_nv_av1_dec_register (plugin, i, GST_RANK_PRIMARY, + sink_template, src_template); + /* Stateless decoder only in case of AV1 */ + register_cuviddec = FALSE; + break; default: break; }