media/filters/vp9_parser.h

   1 // Copyright 2015 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4 //
   5 // This file contains an implementation of a VP9 bitstream parser. The main
   6 // purpose of this parser is to support hardware decode acceleration. Some
   7 // accelerators, e.g. libva which implements VA-API, require the caller
   8 // (chrome) to feed them parsed VP9 frame header.
   9 //
  10 // See media::VP9Decoder for example usage.
  11 //
  12 #ifndef MEDIA_FILTERS_VP9_PARSER_H_
  13 #define MEDIA_FILTERS_VP9_PARSER_H_
  14
  15 #include <stddef.h>
  16 #include <stdint.h>
  17 #include <sys/types.h>
  18
  19 #include <memory>
  20
  21 #include "base/containers/circular_deque.h"
  22 #include "base/functional/callback.h"
  23 #include "base/memory/weak_ptr.h"
  24 #include "media/base/decrypt_config.h"
  25 #include "media/base/media_export.h"
  26 #include "media/base/video_color_space.h"
  27 #include "ui/gfx/geometry/size.h"
  28
  29 namespace media {
  30
  31 const int kVp9MaxProfile = 4;
  32 const int kVp9NumRefFramesLog2 = 3;
  33 const size_t kVp9NumRefFrames = 1 << kVp9NumRefFramesLog2;
  34 const uint8_t kVp9MaxProb = 255;
  35 const size_t kVp9NumRefsPerFrame = 3;
  36 const size_t kVp9NumFrameContextsLog2 = 2;
  37 const size_t kVp9NumFrameContexts = 1 << kVp9NumFrameContextsLog2;
  38
  39 using Vp9Prob = uint8_t;
  40
  41 enum class Vp9ColorSpace {
  42   UNKNOWN = 0,
  43   BT_601 = 1,
  44   BT_709 = 2,
  45   SMPTE_170 = 3,
  46   SMPTE_240 = 4,
  47   BT_2020 = 5,
  48   RESERVED = 6,
  49   SRGB = 7,
  50 };
  51
  52 enum Vp9InterpolationFilter {
  53   EIGHTTAP = 0,
  54   EIGHTTAP_SMOOTH = 1,
  55   EIGHTTAP_SHARP = 2,
  56   BILINEAR = 3,
  57   SWITCHABLE = 4,
  58 };
  59
  60 enum Vp9RefType {
  61   VP9_FRAME_INTRA = 0,
  62   VP9_FRAME_LAST = 1,
  63   VP9_FRAME_GOLDEN = 2,
  64   VP9_FRAME_ALTREF = 3,
  65   VP9_FRAME_MAX = 4,
  66 };
  67
  68 enum Vp9ReferenceMode {
  69   SINGLE_REFERENCE = 0,
  70   COMPOUND_REFERENCE = 1,
  71   REFERENCE_MODE_SELECT = 2,
  72 };
  73
  74 struct MEDIA_EXPORT Vp9SegmentationParams {
  75   static const size_t kNumSegments = 8;
  76   static const size_t kNumTreeProbs = kNumSegments - 1;
  77   static const size_t kNumPredictionProbs = 3;
  78   enum SegmentLevelFeature {
  79     SEG_LVL_ALT_Q = 0,
  80     SEG_LVL_ALT_LF = 1,
  81     SEG_LVL_REF_FRAME = 2,
  82     SEG_LVL_SKIP = 3,
  83     SEG_LVL_MAX
  84   };
  85
  86   bool enabled;
  87
  88   bool update_map;
  89   uint8_t tree_probs[kNumTreeProbs];
  90   bool temporal_update;
  91   uint8_t pred_probs[kNumPredictionProbs];
  92
  93   bool update_data;
  94   bool abs_or_delta_update;
  95   bool feature_enabled[kNumSegments][SEG_LVL_MAX];
  96   int16_t feature_data[kNumSegments][SEG_LVL_MAX];
  97
  98   int16_t y_dequant[kNumSegments][2];
  99   int16_t uv_dequant[kNumSegments][2];
 100
 101   bool FeatureEnabled(size_t seg_id, SegmentLevelFeature feature) const {
 102     return feature_enabled[seg_id][feature];
 103   }
 104
 105   int16_t FeatureData(size_t seg_id, SegmentLevelFeature feature) const {
 106     return feature_data[seg_id][feature];
 107   }
 108 };
 109
 110 struct MEDIA_EXPORT Vp9LoopFilterParams {
 111   static const size_t kNumModeDeltas = 2;
 112
 113   uint8_t level;
 114   uint8_t sharpness;
 115
 116   bool delta_enabled;
 117   bool delta_update;
 118   bool update_ref_deltas[VP9_FRAME_MAX];
 119   int8_t ref_deltas[VP9_FRAME_MAX];
 120   bool update_mode_deltas[kNumModeDeltas];
 121   int8_t mode_deltas[kNumModeDeltas];
 122
 123   // Calculated from above fields.
 124   uint8_t lvl[Vp9SegmentationParams::kNumSegments][VP9_FRAME_MAX]
 125              [kNumModeDeltas];
 126 };
 127
 128 // Members of Vp9FrameHeader will be 0-initialized by Vp9Parser::ParseNextFrame.
 129 struct MEDIA_EXPORT Vp9QuantizationParams {
 130   bool IsLossless() const {
 131     return base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 &&
 132            delta_q_uv_ac == 0;
 133   }
 134
 135   uint8_t base_q_idx;
 136   int8_t delta_q_y_dc;
 137   int8_t delta_q_uv_dc;
 138   int8_t delta_q_uv_ac;
 139 };
 140
 141 // Entropy context for frame parsing
 142 struct MEDIA_EXPORT Vp9FrameContext {
 143   bool IsValid() const;
 144
 145   Vp9Prob tx_probs_8x8[2][1];
 146   Vp9Prob tx_probs_16x16[2][2];
 147   Vp9Prob tx_probs_32x32[2][3];
 148
 149   Vp9Prob coef_probs[4][2][2][6][6][3];
 150   Vp9Prob skip_prob[3];
 151   Vp9Prob inter_mode_probs[7][3];
 152   Vp9Prob interp_filter_probs[4][2];
 153   Vp9Prob is_inter_prob[4];
 154
 155   Vp9Prob comp_mode_prob[5];
 156   Vp9Prob single_ref_prob[5][2];
 157   Vp9Prob comp_ref_prob[5];
 158
 159   Vp9Prob y_mode_probs[4][9];
 160   Vp9Prob uv_mode_probs[10][9];
 161   Vp9Prob partition_probs[16][3];
 162
 163   Vp9Prob mv_joint_probs[3];
 164   Vp9Prob mv_sign_prob[2];
 165   Vp9Prob mv_class_probs[2][10];
 166   Vp9Prob mv_class0_bit_prob[2];
 167   Vp9Prob mv_bits_prob[2][10];
 168   Vp9Prob mv_class0_fr_probs[2][2][3];
 169   Vp9Prob mv_fr_probs[2][3];
 170   Vp9Prob mv_class0_hp_prob[2];
 171   Vp9Prob mv_hp_prob[2];
 172 };
 173
 174 struct MEDIA_EXPORT Vp9CompressedHeader {
 175   enum Vp9TxMode {
 176     ONLY_4X4 = 0,
 177     ALLOW_8X8 = 1,
 178     ALLOW_16X16 = 2,
 179     ALLOW_32X32 = 3,
 180     TX_MODE_SELECT = 4,
 181     TX_MODES = 5,
 182   };
 183
 184   Vp9TxMode tx_mode;
 185   Vp9ReferenceMode reference_mode;
 186 };
 187
 188 // VP9 frame header.
 189 struct MEDIA_EXPORT Vp9FrameHeader {
 190   enum FrameType {
 191     KEYFRAME = 0,
 192     INTERFRAME = 1,
 193   };
 194
 195   bool IsKeyframe() const;
 196   bool IsIntra() const;
 197   bool RefreshFlag(size_t i) const {
 198     return !!(refresh_frame_flags & (1u << i));
 199   }
 200   VideoColorSpace GetColorSpace() const;
 201
 202   uint8_t profile;
 203
 204   bool show_existing_frame;
 205   uint8_t frame_to_show_map_idx;
 206
 207   FrameType frame_type;
 208
 209   bool show_frame;
 210   bool error_resilient_mode;
 211
 212   uint8_t bit_depth;
 213   Vp9ColorSpace color_space;
 214   bool color_range;
 215   uint8_t subsampling_x;
 216   uint8_t subsampling_y;
 217
 218   // The range of frame_width and frame_height is 1..2^16.
 219   uint32_t frame_width;
 220   uint32_t frame_height;
 221   uint32_t render_width;
 222   uint32_t render_height;
 223
 224   bool intra_only;
 225   uint8_t reset_frame_context;
 226   uint8_t refresh_frame_flags;
 227   uint8_t ref_frame_idx[kVp9NumRefsPerFrame];
 228   bool ref_frame_sign_bias[Vp9RefType::VP9_FRAME_MAX];
 229   bool allow_high_precision_mv;
 230   Vp9InterpolationFilter interpolation_filter;
 231
 232   bool refresh_frame_context;
 233   bool frame_parallel_decoding_mode;
 234   uint8_t frame_context_idx;
 235   // |frame_context_idx_to_save_probs| is to be used by save_probs() only, and
 236   // |frame_context_idx| otherwise.
 237   uint8_t frame_context_idx_to_save_probs;
 238
 239   Vp9QuantizationParams quant_params;
 240
 241   uint8_t tile_cols_log2;
 242   uint8_t tile_rows_log2;
 243
 244   // Pointer to the beginning of frame data. It is a responsibility of the
 245   // client of the Vp9Parser to maintain validity of this data while it is
 246   // being used outside of that class.
 247   const uint8_t* data;
 248
 249   // Size of |data| in bytes.
 250   size_t frame_size;
 251
 252   // Size of compressed header in bytes.
 253   size_t header_size_in_bytes;
 254
 255   // Size of uncompressed header in bytes.
 256   size_t uncompressed_header_size;
 257
 258   Vp9CompressedHeader compressed_header;
 259   // Initial frame entropy context after load_probs2(frame_context_idx).
 260   Vp9FrameContext initial_frame_context;
 261   // Current frame entropy context after header parsing.
 262   Vp9FrameContext frame_context;
 263
 264   // Segmentation and loop filter params from uncompressed header
 265   Vp9SegmentationParams segmentation;
 266   Vp9LoopFilterParams loop_filter;
 267 };
 268
 269 // A parser for VP9 bitstream.
 270 class MEDIA_EXPORT Vp9Parser {
 271  public:
 272   // If context update is needed after decoding a frame, the client must
 273   // execute this callback, passing the updated context state.
 274   using ContextRefreshCallback =
 275       base::OnceCallback<void(const Vp9FrameContext&)>;
 276
 277   // ParseNextFrame() return values. See documentation for ParseNextFrame().
 278   enum Result {
 279     kOk,
 280     kInvalidStream,
 281     kEOStream,
 282     kAwaitingRefresh,
 283   };
 284
 285   // The parsing context to keep track of references.
 286   struct ReferenceSlot {
 287     bool initialized;
 288     uint32_t frame_width;
 289     uint32_t frame_height;
 290     uint8_t subsampling_x;
 291     uint8_t subsampling_y;
 292     uint8_t bit_depth;
 293
 294     // More fields for consistency checking.
 295     uint8_t profile;
 296     Vp9ColorSpace color_space;
 297   };
 298
 299   // The parsing context that persists across frames.
 300   class Context {
 301    public:
 302     class MEDIA_EXPORT Vp9FrameContextManager {
 303      public:
 304       Vp9FrameContextManager();
 305       ~Vp9FrameContextManager();
 306       bool initialized() const { return initialized_; }
 307       bool needs_client_update() const { return needs_client_update_; }
 308       const Vp9FrameContext& frame_context() const;
 309
 310       // Resets to uninitialized state.
 311       void Reset();
 312
 313       // Marks this context as requiring an update from parser's client.
 314       void SetNeedsClientUpdate();
 315
 316       // Updates frame context. Returns false if |frame_content| is not valid,
 317       // true otherwise.
 318       bool Update(const Vp9FrameContext& frame_context);
 319
 320       // Returns a callback to update frame context at a later time with.
 321       ContextRefreshCallback GetUpdateCb();
 322
 323      private:
 324       // Updates frame context from parser's client.
 325       void UpdateFromClient(const Vp9FrameContext& frame_context);
 326
 327       bool initialized_ = false;
 328       bool needs_client_update_ = false;
 329       Vp9FrameContext frame_context_;
 330
 331       base::WeakPtrFactory<Vp9FrameContextManager> weak_ptr_factory_{this};
 332     };
 333
 334     void Reset();
 335
 336     // Mark |frame_context_idx| as requiring update from the client.
 337     void MarkFrameContextForUpdate(size_t frame_context_idx);
 338
 339     // Update frame context at |frame_context_idx| with the contents of
 340     // |frame_context|. Returns false if |frame_content| is not valid,
 341     // true otherwise.
 342     bool UpdateFrameContext(size_t frame_context_idx,
 343                             const Vp9FrameContext& frame_context);
 344
 345     // Return ReferenceSlot for frame at |ref_idx|.
 346     const ReferenceSlot& GetRefSlot(size_t ref_idx) const;
 347
 348     // Update contents of ReferenceSlot at |ref_idx| with the contents of
 349     // |ref_slot|.
 350     void UpdateRefSlot(size_t ref_idx, const ReferenceSlot& ref_slot);
 351
 352     const Vp9SegmentationParams& segmentation() const { return segmentation_; }
 353
 354     const Vp9LoopFilterParams& loop_filter() const { return loop_filter_; }
 355
 356    private:
 357     friend class Vp9UncompressedHeaderParser;
 358     friend class Vp9Parser;
 359     friend class Vp9ParserTest;
 360
 361     // Segmentation and loop filter state.
 362     Vp9SegmentationParams segmentation_;
 363     Vp9LoopFilterParams loop_filter_;
 364
 365     // Frame references.
 366     ReferenceSlot ref_slots_[kVp9NumRefFrames];
 367
 368     Vp9FrameContextManager frame_context_managers_[kVp9NumFrameContexts];
 369   };
 370
 371   // See homonymous member variables for information on the parameters.
 372   explicit Vp9Parser(bool parsing_compressed_header);
 373   Vp9Parser(bool parsing_compressed_header, bool needs_external_context_update);
 374
 375   Vp9Parser(const Vp9Parser&) = delete;
 376   Vp9Parser& operator=(const Vp9Parser&) = delete;
 377
 378   ~Vp9Parser();
 379
 380   // Set a new stream buffer to read from, starting at |stream| and of size
 381   // |stream_size| in bytes. |stream| must point to the beginning of a single
 382   // frame or a single superframe, is owned by caller and must remain valid
 383   // until the next call to SetStream(). |spatial_layer_frame_size| may be
 384   // filled if the parsed stream is VP9 SVC. It stands for frame sizes of
 385   // spatial layers. SVC frame might have multiple frames without superframe
 386   // index. The info helps Vp9Parser detecting the beginning of each frame.
 387   void SetStream(const uint8_t* stream,
 388                  off_t stream_size,
 389                  const std::vector<uint32_t>& spatial_layer_frame_size,
 390                  std::unique_ptr<DecryptConfig> stream_config);
 391
 392   void SetStream(const uint8_t* stream,
 393                  off_t stream_size,
 394                  std::unique_ptr<DecryptConfig> stream_config);
 395
 396   // Parse the next frame in the current stream buffer, filling |fhdr| with
 397   // the parsed frame header and updating current segmentation and loop filter
 398   // state. The necessary frame size to decode |fhdr| fills in |allocate_size|.
 399   // The size can be larger than frame size of |fhdr| in the case of SVC stream.
 400   // Also fills |frame_decrypt_config| _if_ the parser was set to use a super
 401   // frame decrypt config.
 402   // Return kOk if a frame has successfully been parsed,
 403   //        kEOStream if there is no more data in the current stream buffer,
 404   //        kAwaitingRefresh if this frame awaiting frame context update, or
 405   //        kInvalidStream on error.
 406   Result ParseNextFrame(Vp9FrameHeader* fhdr,
 407                         gfx::Size* allocate_size,
 408                         std::unique_ptr<DecryptConfig>* frame_decrypt_config);
 409
 410   // Perform the same superframe parsing logic, but don't attempt to parse
 411   // the normal frame headers afterwards, and then only return the decrypt
 412   // config, since the frame itself isn't useful for the testing.
 413   // Returns |true| if a frame would have been sent to |ParseUncompressedHeader|
 414   //         |false| if there was an error parsing the superframe.
 415   std::unique_ptr<DecryptConfig> NextFrameDecryptContextForTesting();
 416   std::string IncrementIVForTesting(const std::string& iv, uint32_t by);
 417
 418   // Return current parsing context.
 419   const Context& context() const { return context_; }
 420
 421   // Return a ContextRefreshCallback, which, if not null, has to be called with
 422   // the new context state after the frame associated with |frame_context_idx|
 423   // is decoded.
 424   ContextRefreshCallback GetContextRefreshCb(size_t frame_context_idx);
 425
 426   // Clear parser state and return to an initialized state.
 427   void Reset();
 428
 429  private:
 430   // Stores start pointer and size of each frame within the current superframe.
 431   struct FrameInfo {
 432     FrameInfo();
 433     FrameInfo(const FrameInfo& copy_from);
 434     FrameInfo(const uint8_t* ptr, off_t size);
 435     ~FrameInfo();
 436
 437     FrameInfo& operator=(const FrameInfo& copy_from);
 438     bool IsValid() const { return ptr != nullptr; }
 439     void Reset() { ptr = nullptr; }
 440
 441     // Starting address of the frame.
 442     const uint8_t* ptr = nullptr;
 443
 444     // Size of the frame in bytes.
 445     off_t size = 0;
 446
 447     // Necessary height and width to decode the frame.
 448     // This is filled only if the stream is SVC.
 449     gfx::Size allocate_size;
 450
 451     std::unique_ptr<DecryptConfig> decrypt_config;
 452   };
 453
 454   base::circular_deque<FrameInfo> ParseSuperframe();
 455   // Parses a frame in SVC stream with |spatial_layer_frame_size_|.
 456   base::circular_deque<FrameInfo> ParseSVCFrame();
 457
 458   // Returns true and populates |result| with the parsing result if parsing of
 459   // current frame is finished (possibly unsuccessfully). |fhdr| will only be
 460   // populated and valid if |result| is kOk. Otherwise return false, indicating
 461   // that the compressed header must be parsed next.
 462   bool ParseUncompressedHeader(const FrameInfo& frame_info,
 463                                Vp9FrameHeader* fhdr,
 464                                Result* result,
 465                                Vp9Parser::Context* context);
 466
 467   // Returns true if parsing of current frame is finished and |result| will be
 468   // populated with value of parsing result. Otherwise, needs to continue setup
 469   // current frame.
 470   bool ParseCompressedHeader(const FrameInfo& frame_info, Result* result);
 471
 472   int64_t GetQIndex(const Vp9QuantizationParams& quant, size_t segid) const;
 473   // Returns true if the setup to |context_| succeeded.
 474   bool SetupSegmentationDequant();
 475   void SetupLoopFilter();
 476   // Returns true if the setup to |context| succeeded.
 477   void UpdateSlots(Vp9Parser::Context* context);
 478
 479   // Current address in the bitstream buffer.
 480   const uint8_t* stream_;
 481
 482   // Remaining bytes in stream_.
 483   off_t bytes_left_;
 484
 485   // Set on ctor if the client needs VP9Parser to also parse compressed headers,
 486   // otherwise they'll be skipped.
 487   const bool parsing_compressed_header_;
 488
 489   // Set on ctor if the client needs to call the ContextRefreshCallback obtained
 490   // via GetContextRefreshCb() with the updated Vp9FrameContext; otherwise
 491   // VP9Parser will update it internally.
 492   const bool needs_external_context_update_;
 493
 494   // FrameInfo for the remaining frames in the current superframe to be parsed.
 495   base::circular_deque<FrameInfo> frames_;
 496
 497   Context context_;
 498
 499   // Encrypted stream info.
 500   std::unique_ptr<DecryptConfig> stream_decrypt_config_;
 501
 502   // The frame size of each spatial layer.
 503   std::vector<uint32_t> spatial_layer_frame_size_;
 504
 505   FrameInfo curr_frame_info_;
 506   Vp9FrameHeader curr_frame_header_;
 507 };
 508
 509 }  // namespace media
 510
 511 #endif  // MEDIA_FILTERS_VP9_PARSER_H_