subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.c

   1 /* GStreamer
   2  * Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Library General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  * License along with this library; if not, write to the
  16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  17  * Boston, MA 02110-1301, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #include "config.h"
  22 #endif
  23
  24 #include <gst/cuda/gstcudautils.h>
  25
  26 #include "gstnvav1dec.h"
  27 #include "gstnvdecoder.h"
  28
  29 #include <string.h>
  30
  31 GST_DEBUG_CATEGORY_STATIC (gst_nv_av1_dec_debug);
  32 #define GST_CAT_DEFAULT gst_nv_av1_dec_debug
  33
  34 typedef struct _GstNvAV1Dec
  35 {
  36   GstAV1Decoder parent;
  37
  38   GstVideoCodecState *output_state;
  39
  40   GstCudaContext *context;
  41   GstNvDecoder *decoder;
  42
  43   GstAV1SequenceHeaderOBU seq_hdr;
  44   CUVIDPICPARAMS params;
  45
  46   /* slice buffer which will be passed to CUVIDPICPARAMS::pBitstreamData */
  47   guint8 *bitstream_buffer;
  48   /* allocated memory size of bitstream_buffer */
  49   gsize bitstream_buffer_alloc_size;
  50   /* current offset of bitstream_buffer (per frame) */
  51   gsize bitstream_buffer_offset;
  52
  53   guint *tile_offsets;
  54   guint tile_offsets_alloc_len;
  55   guint num_tiles;
  56
  57   guint max_width;
  58   guint max_height;
  59   guint bitdepth;
  60   guint8 film_grain_params_present;
  61 } GstNvAV1Dec;
  62
  63 typedef struct _GstNvAV1DecClass
  64 {
  65   GstAV1DecoderClass parent_class;
  66   guint cuda_device_id;
  67 } GstNvAV1DecClass;
  68
  69 typedef struct
  70 {
  71   GstCaps *sink_caps;
  72   GstCaps *src_caps;
  73   guint cuda_device_id;
  74 } GstNvAV1DecClassData;
  75
  76 static GTypeClass *parent_class = NULL;
  77
  78 #define GST_NV_AV1_DEC(object) ((GstNvAV1Dec *) (object))
  79 #define GST_NV_AV1_DEC_GET_CLASS(object) \
  80     (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvAV1DecClass))
  81
  82 static void gst_nv_av1_dec_set_context (GstElement * element,
  83     GstContext * context);
  84 static gboolean gst_nv_av1_dec_open (GstVideoDecoder * decoder);
  85 static gboolean gst_nv_av1_dec_close (GstVideoDecoder * decoder);
  86 static gboolean gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder);
  87 static gboolean gst_nv_av1_dec_decide_allocation (GstVideoDecoder *
  88     decoder, GstQuery * query);
  89 static gboolean gst_nv_av1_dec_src_query (GstVideoDecoder * decoder,
  90     GstQuery * query);
  91
  92 static GstFlowReturn gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder,
  93     const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size);
  94 static GstFlowReturn gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder,
  95     GstVideoCodecFrame * frame, GstAV1Picture * picture);
  96 static GstAV1Picture *gst_nv_av1_dec_duplicate_picture (GstAV1Decoder *
  97     decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture);
  98 static GstFlowReturn gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder,
  99     GstAV1Picture * picture, GstAV1Dpb * dpb);
 100 static GstFlowReturn gst_nv_av1_dec_decode_tile (GstAV1Decoder * decoder,
 101     GstAV1Picture * picture, GstAV1Tile * tile);
 102 static GstFlowReturn gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder,
 103     GstAV1Picture * picture);
 104 static GstFlowReturn gst_nv_av1_dec_output_picture (GstAV1Decoder *
 105     decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture);
 106 static guint gst_nv_av1_dec_get_preferred_output_delay (GstAV1Decoder * decoder,
 107     gboolean is_live);
 108
 109 static void
 110 gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass,
 111     GstNvAV1DecClassData * cdata)
 112 {
 113   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
 114   GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass);
 115   GstAV1DecoderClass *av1decoder_class = GST_AV1_DECODER_CLASS (klass);
 116
 117   element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_set_context);
 118
 119   parent_class = (GTypeClass *) g_type_class_peek_parent (klass);
 120   gst_element_class_set_static_metadata (element_class, "NVDEC AV1 Decoder",
 121       "Codec/Decoder/Video/Hardware",
 122       "NVIDIA AV1 video decoder", "Seungha Yang <seungha@centricular.com>");
 123
 124   gst_element_class_add_pad_template (element_class,
 125       gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS,
 126           cdata->sink_caps));
 127   gst_element_class_add_pad_template (element_class,
 128       gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
 129           cdata->src_caps));
 130
 131   decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_open);
 132   decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_close);
 133   decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_negotiate);
 134   decoder_class->decide_allocation =
 135       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decide_allocation);
 136   decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_src_query);
 137
 138   av1decoder_class->new_sequence =
 139       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_sequence);
 140   av1decoder_class->new_picture =
 141       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_picture);
 142   av1decoder_class->duplicate_picture =
 143       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_duplicate_picture);
 144   av1decoder_class->start_picture =
 145       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_start_picture);
 146   av1decoder_class->decode_tile =
 147       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decode_tile);
 148   av1decoder_class->end_picture =
 149       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_end_picture);
 150   av1decoder_class->output_picture =
 151       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_output_picture);
 152   av1decoder_class->get_preferred_output_delay =
 153       GST_DEBUG_FUNCPTR (gst_nv_av1_dec_get_preferred_output_delay);
 154
 155   klass->cuda_device_id = cdata->cuda_device_id;
 156
 157   gst_caps_unref (cdata->sink_caps);
 158   gst_caps_unref (cdata->src_caps);
 159   g_free (cdata);
 160 }
 161
 162 static void
 163 gst_nv_av1_dec_init (GstNvAV1Dec * self)
 164 {
 165 }
 166
 167 static void
 168 gst_nv_av1_dec_set_context (GstElement * element, GstContext * context)
 169 {
 170   GstNvAV1Dec *self = GST_NV_AV1_DEC (element);
 171   GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self);
 172
 173   GST_DEBUG_OBJECT (self, "set context %s",
 174       gst_context_get_context_type (context));
 175
 176   if (gst_cuda_handle_set_context (element, context, klass->cuda_device_id,
 177           &self->context)) {
 178     goto done;
 179   }
 180
 181   if (self->decoder)
 182     gst_nv_decoder_handle_set_context (self->decoder, element, context);
 183
 184 done:
 185   GST_ELEMENT_CLASS (parent_class)->set_context (element, context);
 186 }
 187
 188 static gboolean
 189 gst_nv_av1_dec_open (GstVideoDecoder * decoder)
 190 {
 191   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 192   GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self);
 193
 194   if (!gst_cuda_ensure_element_context (GST_ELEMENT (self),
 195           klass->cuda_device_id, &self->context)) {
 196     GST_ERROR_OBJECT (self, "Required element data is unavailable");
 197     return FALSE;
 198   }
 199
 200   self->decoder = gst_nv_decoder_new (self->context);
 201   if (!self->decoder) {
 202     GST_ERROR_OBJECT (self, "Failed to create decoder object");
 203     gst_clear_object (&self->context);
 204
 205     return FALSE;
 206   }
 207
 208   return TRUE;
 209 }
 210
 211 static void
 212 gst_nv_av1_dec_reset_bitstream_params (GstNvAV1Dec * self)
 213 {
 214   self->bitstream_buffer_offset = 0;
 215   self->num_tiles = 0;
 216
 217   self->params.nBitstreamDataLen = 0;
 218   self->params.pBitstreamData = NULL;
 219   self->params.nNumSlices = 0;
 220   self->params.pSliceDataOffsets = NULL;
 221 }
 222
 223 static gboolean
 224 gst_nv_av1_dec_close (GstVideoDecoder * decoder)
 225 {
 226   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 227
 228   g_clear_pointer (&self->output_state, gst_video_codec_state_unref);
 229   gst_clear_object (&self->decoder);
 230   gst_clear_object (&self->context);
 231
 232   gst_nv_av1_dec_reset_bitstream_params (self);
 233
 234   g_free (self->bitstream_buffer);
 235   self->bitstream_buffer = NULL;
 236
 237   g_free (self->tile_offsets);
 238   self->tile_offsets = NULL;
 239
 240   self->bitstream_buffer_alloc_size = 0;
 241   self->tile_offsets_alloc_len = 0;
 242
 243   return TRUE;
 244 }
 245
 246 static gboolean
 247 gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder)
 248 {
 249   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 250   GstAV1Decoder *av1dec = GST_AV1_DECODER (decoder);
 251
 252   GST_DEBUG_OBJECT (self, "negotiate");
 253
 254   gst_nv_decoder_negotiate (self->decoder, decoder, av1dec->input_state,
 255       &self->output_state);
 256
 257   return GST_VIDEO_DECODER_CLASS (parent_class)->negotiate (decoder);
 258 }
 259
 260 static gboolean
 261 gst_nv_av1_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
 262 {
 263   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 264
 265   if (!gst_nv_decoder_decide_allocation (self->decoder, decoder, query)) {
 266     GST_WARNING_OBJECT (self, "Failed to handle decide allocation");
 267     return FALSE;
 268   }
 269
 270   return GST_VIDEO_DECODER_CLASS (parent_class)->decide_allocation
 271       (decoder, query);
 272 }
 273
 274 static gboolean
 275 gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, GstQuery * query)
 276 {
 277   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 278
 279   switch (GST_QUERY_TYPE (query)) {
 280     case GST_QUERY_CONTEXT:
 281       if (gst_cuda_handle_context_query (GST_ELEMENT (decoder), query,
 282               self->context)) {
 283         return TRUE;
 284       } else if (self->decoder &&
 285           gst_nv_decoder_handle_context_query (self->decoder, decoder, query)) {
 286         return TRUE;
 287       }
 288       break;
 289     default:
 290       break;
 291   }
 292
 293   return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query);
 294 }
 295
 296 static GstFlowReturn
 297 gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder,
 298     const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size)
 299 {
 300   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 301   gboolean modified = FALSE;
 302   guint max_width, max_height;
 303
 304   GST_LOG_OBJECT (self, "new sequence");
 305
 306   if (seq_hdr->seq_profile != GST_AV1_PROFILE_0) {
 307     GST_WARNING_OBJECT (self, "Unsupported profile %d", seq_hdr->seq_profile);
 308     return GST_FLOW_NOT_NEGOTIATED;
 309   }
 310
 311   if (seq_hdr->num_planes != 3) {
 312     GST_WARNING_OBJECT (self, "Monochrome is not supported");
 313     return GST_FLOW_NOT_NEGOTIATED;
 314   }
 315
 316   self->seq_hdr = *seq_hdr;
 317
 318   if (self->bitdepth != seq_hdr->bit_depth) {
 319     GST_INFO_OBJECT (self, "Bitdepth changed %d -> %d", self->bitdepth,
 320         seq_hdr->bit_depth);
 321     self->bitdepth = seq_hdr->bit_depth;
 322     modified = TRUE;
 323   }
 324
 325   max_width = seq_hdr->max_frame_width_minus_1 + 1;
 326   max_height = seq_hdr->max_frame_height_minus_1 + 1;
 327
 328   if (self->max_width != max_width || self->max_height != max_height) {
 329     GST_INFO_OBJECT (self, "Resolution changed %dx%d -> %dx%d",
 330         self->max_width, self->max_height, max_width, max_height);
 331     self->max_width = max_width;
 332     self->max_height = max_height;
 333     modified = TRUE;
 334   }
 335
 336   if (self->film_grain_params_present != seq_hdr->film_grain_params_present) {
 337     GST_INFO_OBJECT (self, "Film grain present changed %d -> %d",
 338         self->film_grain_params_present, seq_hdr->film_grain_params_present);
 339     self->film_grain_params_present = seq_hdr->film_grain_params_present;
 340     modified = TRUE;
 341   }
 342
 343   if (modified || !gst_nv_decoder_is_configured (self->decoder)) {
 344     GstVideoInfo info;
 345     GstVideoFormat out_format = GST_VIDEO_FORMAT_UNKNOWN;
 346
 347     if (self->bitdepth == 8) {
 348       out_format = GST_VIDEO_FORMAT_NV12;
 349     } else if (self->bitdepth == 10) {
 350       out_format = GST_VIDEO_FORMAT_P010_10LE;
 351     } else {
 352       GST_WARNING_OBJECT (self, "Invalid bit-depth %d", seq_hdr->bit_depth);
 353       return GST_FLOW_NOT_NEGOTIATED;
 354     }
 355
 356     gst_video_info_set_format (&info,
 357         out_format, self->max_width, self->max_height);
 358
 359     if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_AV1,
 360             &info, self->max_width, self->max_height, self->bitdepth,
 361             max_dpb_size, self->film_grain_params_present ? TRUE : FALSE)) {
 362       GST_ERROR_OBJECT (self, "Failed to create decoder");
 363       return GST_FLOW_NOT_NEGOTIATED;
 364     }
 365
 366     if (!gst_video_decoder_negotiate (GST_VIDEO_DECODER (self))) {
 367       GST_ERROR_OBJECT (self, "Failed to negotiate with downstream");
 368       return GST_FLOW_NOT_NEGOTIATED;
 369     }
 370   }
 371
 372   return GST_FLOW_OK;
 373 }
 374
 375 static GstFlowReturn
 376 gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder,
 377     GstVideoCodecFrame * frame, GstAV1Picture * picture)
 378 {
 379   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 380   GstNvDecoderFrame *nv_frame;
 381
 382   nv_frame = gst_nv_decoder_new_frame (self->decoder);
 383   if (!nv_frame) {
 384     GST_ERROR_OBJECT (self, "No available decoder frame");
 385     return GST_FLOW_ERROR;
 386   }
 387
 388   GST_LOG_OBJECT (self,
 389       "New decoder frame %p (index %d)", nv_frame, nv_frame->index);
 390
 391   gst_av1_picture_set_user_data (picture,
 392       nv_frame, (GDestroyNotify) gst_nv_decoder_frame_unref);
 393
 394   return GST_FLOW_OK;
 395 }
 396
 397 static GstNvDecoderFrame *
 398 gst_nv_av1_dec_get_decoder_frame_from_picture (GstNvAV1Dec * self,
 399     GstAV1Picture * picture)
 400 {
 401   GstNvDecoderFrame *frame;
 402
 403   frame = (GstNvDecoderFrame *) gst_av1_picture_get_user_data (picture);
 404
 405   if (!frame)
 406     GST_DEBUG_OBJECT (self, "current picture does not have decoder frame");
 407
 408   return frame;
 409 }
 410
 411 static GstAV1Picture *
 412 gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * decoder,
 413     GstVideoCodecFrame * frame, GstAV1Picture * picture)
 414 {
 415   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 416   GstNvDecoderFrame *nv_frame;
 417   GstAV1Picture *new_picture;
 418
 419   nv_frame = gst_nv_av1_dec_get_decoder_frame_from_picture (self, picture);
 420
 421   if (!nv_frame) {
 422     GST_ERROR_OBJECT (self, "Parent picture does not have decoder frame");
 423     return NULL;
 424   }
 425
 426   new_picture = gst_av1_picture_new ();
 427   new_picture->frame_hdr = picture->frame_hdr;
 428
 429   gst_av1_picture_set_user_data (new_picture,
 430       gst_nv_decoder_frame_ref (nv_frame),
 431       (GDestroyNotify) gst_nv_decoder_frame_unref);
 432
 433   return new_picture;
 434 }
 435
 436 static inline guint8
 437 gst_nv_av1_dec_get_lr_unit_size (guint size)
 438 {
 439   switch (size) {
 440     case 32:
 441       return 0;
 442     case 64:
 443       return 1;
 444     case 128:
 445       return 2;
 446     case 256:
 447       return 3;
 448     default:
 449       break;
 450   }
 451
 452   return 3;
 453 }
 454
 455 static GstFlowReturn
 456 gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture,
 457     GstAV1Dpb * dpb)
 458 {
 459   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 460   CUVIDPICPARAMS *params = &self->params;
 461   CUVIDAV1PICPARAMS *av1_params = &params->CodecSpecific.av1;
 462   const GstAV1SequenceHeaderOBU *seq_hdr = &self->seq_hdr;
 463   const GstAV1FrameHeaderOBU *frame_hdr = &picture->frame_hdr;
 464   const GstAV1GlobalMotionParams *gmp = &frame_hdr->global_motion_params;
 465   const GstAV1QuantizationParams *qp = &frame_hdr->quantization_params;
 466   const GstAV1TileInfo *ti = &frame_hdr->tile_info;
 467   const GstAV1CDEFParams *cp = &frame_hdr->cdef_params;
 468   const GstAV1SegmenationParams *sp = &frame_hdr->segmentation_params;
 469   const GstAV1LoopFilterParams *lp = &frame_hdr->loop_filter_params;
 470   const GstAV1LoopRestorationParams *lrp = &frame_hdr->loop_restoration_params;
 471   const GstAV1FilmGrainParams *fgp = &frame_hdr->film_grain_params;
 472   GstNvDecoderFrame *frame;
 473   GstNvDecoderFrame *other_frame;
 474   GstAV1Picture *other_pic;
 475   guint i, j;
 476
 477   frame = gst_nv_av1_dec_get_decoder_frame_from_picture (self, picture);
 478   if (!frame) {
 479     GST_ERROR_OBJECT (self, "Decoder frame is unavailable");
 480     return GST_FLOW_ERROR;
 481   }
 482
 483   memset (params, 0, sizeof (CUVIDPICPARAMS));
 484
 485   params->PicWidthInMbs = GST_ROUND_UP_16 (frame_hdr->frame_width) >> 4;
 486   params->FrameHeightInMbs = GST_ROUND_UP_16 (frame_hdr->frame_height) >> 4;
 487   params->CurrPicIdx = frame->index;
 488   params->intra_pic_flag = frame_hdr->frame_is_intra;
 489
 490   av1_params->width = frame_hdr->frame_width;
 491   av1_params->height = frame_hdr->frame_height;
 492   av1_params->frame_offset = frame_hdr->order_hint;
 493   av1_params->decodePicIdx = frame->decode_frame_index;
 494
 495   /* sequence header */
 496   av1_params->profile = seq_hdr->seq_profile;
 497   av1_params->use_128x128_superblock = seq_hdr->use_128x128_superblock;
 498   av1_params->subsampling_x = seq_hdr->color_config.subsampling_x;
 499   av1_params->subsampling_y = seq_hdr->color_config.subsampling_y;
 500   av1_params->mono_chrome = seq_hdr->color_config.mono_chrome;
 501   av1_params->bit_depth_minus8 = seq_hdr->bit_depth - 8;
 502   av1_params->enable_filter_intra = seq_hdr->enable_filter_intra;
 503   av1_params->enable_intra_edge_filter = seq_hdr->enable_intra_edge_filter;
 504   av1_params->enable_interintra_compound = seq_hdr->enable_interintra_compound;
 505   av1_params->enable_masked_compound = seq_hdr->enable_masked_compound;
 506   av1_params->enable_dual_filter = seq_hdr->enable_dual_filter;
 507   av1_params->enable_order_hint = seq_hdr->enable_order_hint;
 508   av1_params->order_hint_bits_minus1 = seq_hdr->order_hint_bits_minus_1;
 509   av1_params->enable_jnt_comp = seq_hdr->enable_jnt_comp;
 510   av1_params->enable_superres = seq_hdr->enable_superres;
 511   av1_params->enable_cdef = seq_hdr->enable_cdef;
 512   av1_params->enable_restoration = seq_hdr->enable_restoration;
 513   av1_params->enable_fgs = seq_hdr->film_grain_params_present;
 514
 515   /* frame header */
 516   av1_params->frame_type = frame_hdr->frame_type;
 517   av1_params->show_frame = frame_hdr->show_frame;
 518   av1_params->disable_cdf_update = frame_hdr->disable_cdf_update;
 519   av1_params->allow_screen_content_tools =
 520       frame_hdr->allow_screen_content_tools;
 521   if (frame_hdr->force_integer_mv || frame_hdr->frame_is_intra)
 522     av1_params->force_integer_mv = 1;
 523   else
 524     av1_params->force_integer_mv = 0;
 525   if (frame_hdr->use_superres) {
 526     av1_params->coded_denom =
 527         frame_hdr->superres_denom - GST_AV1_SUPERRES_DENOM_MIN;
 528   } else {
 529     av1_params->coded_denom = 0;
 530   }
 531   av1_params->allow_intrabc = frame_hdr->allow_intrabc;
 532   av1_params->allow_high_precision_mv = frame_hdr->allow_high_precision_mv;
 533   av1_params->interp_filter = frame_hdr->interpolation_filter;
 534   av1_params->switchable_motion_mode = frame_hdr->is_motion_mode_switchable;
 535   av1_params->use_ref_frame_mvs = frame_hdr->use_ref_frame_mvs;
 536   av1_params->disable_frame_end_update_cdf =
 537       frame_hdr->disable_frame_end_update_cdf;
 538   av1_params->delta_q_present = qp->delta_q_present;
 539   av1_params->delta_q_res = qp->delta_q_res;
 540   av1_params->using_qmatrix = qp->using_qmatrix;
 541   av1_params->coded_lossless = frame_hdr->coded_lossless;
 542   av1_params->use_superres = frame_hdr->use_superres;
 543   av1_params->tx_mode = frame_hdr->tx_mode;
 544   av1_params->reference_mode = frame_hdr->reference_select;
 545   av1_params->allow_warped_motion = frame_hdr->allow_warped_motion;
 546   av1_params->reduced_tx_set = frame_hdr->reduced_tx_set;
 547   av1_params->skip_mode = frame_hdr->skip_mode_present;
 548
 549   /* tiling info */
 550   av1_params->num_tile_cols = ti->tile_cols;
 551   av1_params->num_tile_rows = ti->tile_rows;
 552   av1_params->context_update_tile_id = ti->context_update_tile_id;
 553   for (i = 0; i < ti->tile_cols; i++)
 554     av1_params->tile_widths[i] = ti->width_in_sbs_minus_1[i] + 1;
 555
 556   for (i = 0; i < ti->tile_rows; i++)
 557     av1_params->tile_heights[i] = ti->height_in_sbs_minus_1[i] + 1;
 558
 559   /* CDEF */
 560   av1_params->cdef_damping_minus_3 = cp->cdef_damping - 3;
 561   av1_params->cdef_bits = cp->cdef_bits;
 562   for (i = 0; i < GST_AV1_CDEF_MAX; i++) {
 563     guint8 primary;
 564     guint8 secondary;
 565
 566     primary = cp->cdef_y_pri_strength[i];
 567     secondary = cp->cdef_y_sec_strength[i];
 568     if (secondary == 4)
 569       secondary--;
 570
 571     av1_params->cdef_y_strength[i] = (primary & 0x0f) | (secondary << 4);
 572
 573     primary = cp->cdef_uv_pri_strength[i];
 574     secondary = cp->cdef_uv_sec_strength[i];
 575     if (secondary == 4)
 576       secondary--;
 577
 578     av1_params->cdef_uv_strength[i] = (primary & 0x0f) | (secondary << 4);
 579   }
 580
 581   /* SkipModeFrames */
 582   if (frame_hdr->skip_mode_present) {
 583     av1_params->SkipModeFrame0 = frame_hdr->skip_mode_frame[0];
 584     av1_params->SkipModeFrame1 = frame_hdr->skip_mode_frame[1];
 585   }
 586
 587   /* qp information */
 588   av1_params->base_qindex = qp->base_q_idx;
 589   av1_params->qp_y_dc_delta_q = qp->delta_q_y_dc;
 590   av1_params->qp_u_dc_delta_q = qp->delta_q_u_dc;
 591   av1_params->qp_u_ac_delta_q = qp->delta_q_u_ac;
 592   av1_params->qp_v_dc_delta_q = qp->delta_q_v_dc;
 593   av1_params->qp_v_ac_delta_q = qp->delta_q_v_ac;
 594   av1_params->qm_y = qp->qm_y;
 595   av1_params->qm_u = qp->qm_u;
 596   av1_params->qm_v = qp->qm_v;
 597
 598   /* segmentation */
 599   av1_params->segmentation_enabled = sp->segmentation_enabled;
 600   av1_params->segmentation_update_map = sp->segmentation_update_map;
 601   av1_params->segmentation_update_data = sp->segmentation_update_data;
 602   av1_params->segmentation_temporal_update = sp->segmentation_temporal_update;
 603   for (i = 0; i < GST_AV1_MAX_SEGMENTS; i++) {
 604     for (j = 0; j < GST_AV1_SEG_LVL_MAX; j++) {
 605       av1_params->segmentation_feature_data[i][j] = sp->feature_data[i][j];
 606       av1_params->segmentation_feature_mask[i] |=
 607           sp->feature_enabled[i][j] << j;
 608     }
 609   }
 610
 611   /* loopfilter */
 612   av1_params->loop_filter_level[0] = lp->loop_filter_level[0];
 613   av1_params->loop_filter_level[1] = lp->loop_filter_level[1];
 614   av1_params->loop_filter_level_u = lp->loop_filter_level[2];
 615   av1_params->loop_filter_level_v = lp->loop_filter_level[3];
 616   av1_params->loop_filter_sharpness = lp->loop_filter_sharpness;
 617   for (i = 0; i < GST_AV1_TOTAL_REFS_PER_FRAME; i++) {
 618     av1_params->loop_filter_ref_deltas[i] = lp->loop_filter_ref_deltas[i];
 619   }
 620   av1_params->loop_filter_mode_deltas[0] = lp->loop_filter_mode_deltas[0];
 621   av1_params->loop_filter_mode_deltas[1] = lp->loop_filter_mode_deltas[1];
 622   av1_params->loop_filter_delta_enabled = lp->loop_filter_delta_enabled;
 623   av1_params->loop_filter_delta_update = lp->loop_filter_delta_update;
 624   av1_params->delta_lf_present = lp->delta_lf_present;
 625   av1_params->delta_lf_res = lp->delta_lf_res;
 626   av1_params->delta_lf_multi = lp->delta_lf_multi;
 627
 628   /* restoration */
 629   for (i = 0; i < 3; i++) {
 630     av1_params->lr_unit_size[i] =
 631         gst_nv_av1_dec_get_lr_unit_size (lrp->loop_restoration_size[i]);
 632   }
 633   av1_params->lr_type[0] = lrp->frame_restoration_type[0];
 634   av1_params->lr_type[1] = lrp->frame_restoration_type[1];
 635   av1_params->lr_type[2] = lrp->frame_restoration_type[2];
 636
 637   /* reference frames */
 638   for (i = 0; i < GST_AV1_TOTAL_REFS_PER_FRAME; i++) {
 639     guint8 ref_idx = 0xff;
 640
 641     other_pic = dpb->pic_list[i];
 642     if (other_pic) {
 643       other_frame =
 644           gst_nv_av1_dec_get_decoder_frame_from_picture (self, other_pic);
 645       if (!other_frame) {
 646         GST_ERROR_OBJECT (self, "reference frame is unavailable");
 647         return GST_FLOW_ERROR;
 648       }
 649
 650       ref_idx = other_frame->decode_frame_index;
 651     }
 652
 653     av1_params->ref_frame_map[i] = ref_idx;
 654   }
 655
 656   if (frame_hdr->primary_ref_frame == GST_AV1_PRIMARY_REF_NONE) {
 657     av1_params->primary_ref_frame = 0xff;
 658   } else {
 659     guint8 primary_ref_idx;
 660
 661     g_assert (frame_hdr->primary_ref_frame < 8);
 662
 663     primary_ref_idx = frame_hdr->ref_frame_idx[frame_hdr->primary_ref_frame];
 664     av1_params->primary_ref_frame = av1_params->ref_frame_map[primary_ref_idx];
 665   }
 666   av1_params->temporal_layer_id = picture->temporal_id;
 667   av1_params->spatial_layer_id = picture->spatial_id;
 668
 669   /* ref frame list and global motion */
 670   for (i = 0; i < GST_AV1_REFS_PER_FRAME; i++) {
 671     gint8 ref_idx = frame_hdr->ref_frame_idx[i];
 672
 673     other_pic = NULL;
 674
 675     if (ref_idx >= 0)
 676       other_pic = dpb->pic_list[ref_idx];
 677
 678     if (other_pic) {
 679       other_frame =
 680           gst_nv_av1_dec_get_decoder_frame_from_picture (self, other_pic);
 681
 682       av1_params->ref_frame[i].index = other_frame->decode_frame_index;
 683       av1_params->ref_frame[i].width = other_pic->frame_hdr.frame_width;
 684       av1_params->ref_frame[i].height = other_pic->frame_hdr.frame_height;
 685     } else {
 686       av1_params->ref_frame[i].index = 0xff;
 687     }
 688
 689     av1_params->global_motion[i].invalid = gmp->invalid[i];
 690     av1_params->global_motion[i].wmtype =
 691         gmp->gm_type[GST_AV1_REF_LAST_FRAME + i];
 692     for (j = 0; j < 6; j++) {
 693       av1_params->global_motion[i].wmmat[j] =
 694           gmp->gm_params[GST_AV1_REF_LAST_FRAME + i][j];
 695     }
 696   }
 697
 698   /* film grain params */
 699   if (seq_hdr->film_grain_params_present) {
 700     av1_params->apply_grain = fgp->apply_grain;
 701     av1_params->overlap_flag = fgp->overlap_flag;
 702     av1_params->scaling_shift_minus8 = fgp->grain_scaling_minus_8;
 703     av1_params->chroma_scaling_from_luma = fgp->chroma_scaling_from_luma;
 704     av1_params->ar_coeff_lag = fgp->ar_coeff_lag;
 705     av1_params->ar_coeff_shift_minus6 = fgp->ar_coeff_shift_minus_6;
 706     av1_params->grain_scale_shift = fgp->grain_scale_shift;
 707     av1_params->clip_to_restricted_range = fgp->clip_to_restricted_range;
 708     av1_params->num_y_points = fgp->num_y_points;
 709     for (i = 0; i < fgp->num_y_points && i < 14; i++) {
 710       av1_params->scaling_points_y[i][0] = fgp->point_y_value[i];
 711       av1_params->scaling_points_y[i][1] = fgp->point_y_scaling[i];
 712     }
 713
 714     av1_params->num_cb_points = fgp->num_cb_points;
 715     for (i = 0; i < fgp->num_cb_points && i < 10; i++) {
 716       av1_params->scaling_points_cb[i][0] = fgp->point_cb_value[i];
 717       av1_params->scaling_points_cb[i][1] = fgp->point_cb_scaling[i];
 718     }
 719
 720     av1_params->num_cr_points = fgp->num_cr_points;
 721     for (i = 0; i < fgp->num_cr_points && i < 10; i++) {
 722       av1_params->scaling_points_cr[i][0] = fgp->point_cr_value[i];
 723       av1_params->scaling_points_cr[i][1] = fgp->point_cr_scaling[i];
 724     }
 725
 726     av1_params->random_seed = fgp->grain_seed;
 727     for (i = 0; i < 24; i++) {
 728       av1_params->ar_coeffs_y[i] = (short) fgp->ar_coeffs_y_plus_128[i] - 128;
 729     }
 730
 731     for (i = 0; i < 25; i++) {
 732       av1_params->ar_coeffs_cb[i] = (short) fgp->ar_coeffs_cb_plus_128[i] - 128;
 733       av1_params->ar_coeffs_cr[i] = (short) fgp->ar_coeffs_cr_plus_128[i] - 128;
 734     }
 735     av1_params->cb_mult = fgp->cb_mult;
 736     av1_params->cb_luma_mult = fgp->cb_luma_mult;
 737     av1_params->cb_offset = fgp->cb_offset;
 738     av1_params->cr_mult = fgp->cr_mult;
 739     av1_params->cr_luma_mult = fgp->cr_luma_mult;
 740     av1_params->cr_offset = fgp->cr_offset;
 741   }
 742
 743   gst_nv_av1_dec_reset_bitstream_params (self);
 744
 745   return GST_FLOW_OK;
 746 }
 747
 748 static GstFlowReturn
 749 gst_nv_av1_dec_decode_tile (GstAV1Decoder * decoder,
 750     GstAV1Picture * picture, GstAV1Tile * tile)
 751 {
 752   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 753   GstAV1TileGroupOBU *tile_group = &tile->tile_group;
 754   guint i;
 755   guint buffer_size;
 756
 757   if (tile_group->num_tiles * 2 > self->tile_offsets_alloc_len) {
 758     self->tile_offsets_alloc_len = tile_group->num_tiles * 2;
 759
 760     self->tile_offsets = (guint *) g_realloc_n (self->tile_offsets,
 761         self->tile_offsets_alloc_len, sizeof (guint));
 762   }
 763
 764   self->num_tiles = tile_group->num_tiles;
 765
 766   for (i = tile_group->tg_start; i <= tile_group->tg_end; i++) {
 767     guint offset = self->bitstream_buffer_offset +
 768         tile_group->entry[i].tile_offset;
 769
 770     self->tile_offsets[i * 2] = offset;
 771     self->tile_offsets[i * 2 + 1] = offset + tile_group->entry[i].tile_size;
 772   }
 773
 774   buffer_size = self->bitstream_buffer_offset + tile->obu.obu_size;
 775   if (buffer_size > self->bitstream_buffer_alloc_size) {
 776     guint alloc_size = buffer_size * 2;
 777
 778     self->bitstream_buffer = (guint8 *) g_realloc (self->bitstream_buffer,
 779         alloc_size);
 780     self->bitstream_buffer_alloc_size = alloc_size;
 781   }
 782
 783   memcpy (self->bitstream_buffer + self->bitstream_buffer_offset,
 784       tile->obu.data, tile->obu.obu_size);
 785
 786   self->bitstream_buffer_offset += tile->obu.obu_size;
 787
 788   return GST_FLOW_OK;
 789 }
 790
 791 static GstFlowReturn
 792 gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder, GstAV1Picture * picture)
 793 {
 794   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 795   gboolean ret;
 796   CUVIDPICPARAMS *params = &self->params;
 797
 798   params->nBitstreamDataLen = self->bitstream_buffer_offset;
 799   params->pBitstreamData = self->bitstream_buffer;
 800   params->nNumSlices = self->num_tiles;
 801   params->pSliceDataOffsets = self->tile_offsets;
 802
 803   ret = gst_nv_decoder_decode_picture (self->decoder, params);
 804
 805   if (!ret) {
 806     GST_ERROR_OBJECT (self, "Failed to decode picture");
 807     return GST_FLOW_ERROR;
 808   }
 809
 810   return GST_FLOW_OK;
 811 }
 812
 813 static GstFlowReturn
 814 gst_nv_av1_dec_output_picture (GstAV1Decoder * decoder,
 815     GstVideoCodecFrame * frame, GstAV1Picture * picture)
 816 {
 817   GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder);
 818   GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder);
 819   GstNvDecoderFrame *decoder_frame;
 820
 821   GST_LOG_OBJECT (self, "Outputting picture %p", picture);
 822
 823   decoder_frame = (GstNvDecoderFrame *) gst_av1_picture_get_user_data (picture);
 824   if (!decoder_frame) {
 825     GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture);
 826     goto error;
 827   }
 828
 829   if (!gst_nv_decoder_finish_frame (self->decoder, vdec, decoder_frame,
 830           &frame->output_buffer)) {
 831     GST_ERROR_OBJECT (self, "Failed to handle output picture");
 832     goto error;
 833   }
 834
 835   gst_av1_picture_unref (picture);
 836
 837   return gst_video_decoder_finish_frame (vdec, frame);
 838
 839 error:
 840   gst_video_decoder_drop_frame (vdec, frame);
 841   gst_av1_picture_unref (picture);
 842
 843   return GST_FLOW_ERROR;
 844 }
 845
 846 static guint
 847 gst_nv_av1_dec_get_preferred_output_delay (GstAV1Decoder * decoder,
 848     gboolean is_live)
 849 {
 850   /* Prefer to zero latency for live pipeline */
 851   if (is_live)
 852     return 0;
 853
 854   /* NVCODEC SDK uses 4 frame delay for better throughput performance */
 855   return 4;
 856 }
 857
 858 void
 859 gst_nv_av1_dec_register (GstPlugin * plugin, guint device_id, guint rank,
 860     GstCaps * sink_caps, GstCaps * src_caps)
 861 {
 862   GType type;
 863   gchar *type_name;
 864   gchar *feature_name;
 865   guint index = 0;
 866   GTypeInfo type_info = {
 867     sizeof (GstNvAV1DecClass),
 868     NULL,
 869     NULL,
 870     (GClassInitFunc) gst_nv_av1_dec_class_init,
 871     NULL,
 872     NULL,
 873     sizeof (GstNvAV1Dec),
 874     0,
 875     (GInstanceInitFunc) gst_nv_av1_dec_init,
 876   };
 877   GstNvAV1DecClassData *cdata;
 878
 879   GST_DEBUG_CATEGORY_INIT (gst_nv_av1_dec_debug, "nvav1dec", 0, "nvav1dec");
 880
 881   cdata = g_new0 (GstNvAV1DecClassData, 1);
 882   cdata->sink_caps = gst_caps_ref (sink_caps);
 883   cdata->src_caps = gst_caps_ref (src_caps);
 884   cdata->cuda_device_id = device_id;
 885
 886   type_info.class_data = cdata;
 887
 888   type_name = g_strdup ("GstNvAV1Dec");
 889   feature_name = g_strdup ("nvav1dec");
 890
 891   while (g_type_from_name (type_name)) {
 892     index++;
 893     g_free (type_name);
 894     g_free (feature_name);
 895     type_name = g_strdup_printf ("GstNvAV1Device%dDec", index);
 896     feature_name = g_strdup_printf ("nvav1device%ddec", index);
 897   }
 898
 899   type = g_type_register_static (GST_TYPE_AV1_DECODER,
 900       type_name, &type_info, 0);
 901
 902   /* make lower rank than default device */
 903   if (rank > 0 && index != 0)
 904     rank--;
 905
 906   if (index != 0)
 907     gst_element_type_set_skip_documentation (type);
 908
 909   if (!gst_element_register (plugin, feature_name, rank, type))
 910     GST_WARNING ("Failed to register plugin '%s'", type_name);
 911
 912   g_free (type_name);
 913   g_free (feature_name);
 914 }