src/content/common/gpu/media/vt_video_decode_accelerator.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <CoreVideo/CoreVideo.h>
   6 #include <OpenGL/CGLIOSurface.h>
   7
   8 #include "base/bind.h"
   9 #include "base/command_line.h"
  10 #include "base/sys_byteorder.h"
  11 #include "base/thread_task_runner_handle.h"
  12 #include "content/common/gpu/media/vt_video_decode_accelerator.h"
  13 #include "content/public/common/content_switches.h"
  14 #include "media/filters/h264_parser.h"
  15 #include "ui/gl/scoped_binders.h"
  16 #include "ui/gl/scoped_cgl.h"
  17
  18 using content_common_gpu_media::kModuleVt;
  19 using content_common_gpu_media::InitializeStubs;
  20 using content_common_gpu_media::IsVtInitialized;
  21 using content_common_gpu_media::StubPathMap;
  22
  23 namespace content {
  24
  25 // Size of NALU length headers in AVCC/MPEG-4 format (can be 1, 2, or 4).
  26 static const int kNALUHeaderLength = 4;
  27
  28 // We only request 5 picture buffers from the client which are used to hold the
  29 // decoded samples. These buffers are then reused when the client tells us that
  30 // it is done with the buffer.
  31 static const int kNumPictureBuffers = 5;
  32
  33 // Route decoded frame callbacks back into the VTVideoDecodeAccelerator.
  34 static void OutputThunk(
  35     void* decompression_output_refcon,
  36     void* source_frame_refcon,
  37     OSStatus status,
  38     VTDecodeInfoFlags info_flags,
  39     CVImageBufferRef image_buffer,
  40     CMTime presentation_time_stamp,
  41     CMTime presentation_duration) {
  42   // TODO(sandersd): Implement flush-before-delete to guarantee validity.
  43   VTVideoDecodeAccelerator* vda =
  44       reinterpret_cast<VTVideoDecodeAccelerator*>(decompression_output_refcon);
  45   int32_t bitstream_id = reinterpret_cast<intptr_t>(source_frame_refcon);
  46   vda->Output(bitstream_id, status, image_buffer);
  47 }
  48
  49 VTVideoDecodeAccelerator::DecodedFrame::DecodedFrame(
  50     int32_t bitstream_id,
  51     CVImageBufferRef image_buffer)
  52     : bitstream_id(bitstream_id),
  53       image_buffer(image_buffer) {
  54 }
  55
  56 VTVideoDecodeAccelerator::DecodedFrame::~DecodedFrame() {
  57 }
  58
  59 VTVideoDecodeAccelerator::VTVideoDecodeAccelerator(CGLContextObj cgl_context)
  60     : cgl_context_(cgl_context),
  61       client_(NULL),
  62       format_(NULL),
  63       session_(NULL),
  64       gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()),
  65       weak_this_factory_(this),
  66       decoder_thread_("VTDecoderThread") {
  67   callback_.decompressionOutputCallback = OutputThunk;
  68   callback_.decompressionOutputRefCon = this;
  69 }
  70
  71 VTVideoDecodeAccelerator::~VTVideoDecodeAccelerator() {
  72 }
  73
  74 bool VTVideoDecodeAccelerator::Initialize(
  75     media::VideoCodecProfile profile,
  76     Client* client) {
  77   DCHECK(CalledOnValidThread());
  78   client_ = client;
  79
  80   // Only H.264 is supported.
  81   if (profile < media::H264PROFILE_MIN || profile > media::H264PROFILE_MAX)
  82     return false;
  83
  84   // Require --no-sandbox until VideoToolbox library loading is part of sandbox
  85   // startup (and this VDA is ready for regular users).
  86   if (!base::CommandLine::ForCurrentProcess()->HasSwitch(switches::kNoSandbox))
  87     return false;
  88
  89   if (!IsVtInitialized()) {
  90     // CoreVideo is also required, but the loader stops after the first
  91     // path is loaded. Instead we rely on the transitive dependency from
  92     // VideoToolbox to CoreVideo.
  93     // TODO(sandersd): Fallback to PrivateFrameworks for VideoToolbox.
  94     StubPathMap paths;
  95     paths[kModuleVt].push_back(FILE_PATH_LITERAL(
  96         "/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox"));
  97     if (!InitializeStubs(paths))
  98       return false;
  99   }
 100
 101   // Spawn a thread to handle parsing and calling VideoToolbox.
 102   if (!decoder_thread_.Start())
 103     return false;
 104
 105   return true;
 106 }
 107
 108 // TODO(sandersd): Proper error reporting instead of CHECKs.
 109 void VTVideoDecodeAccelerator::ConfigureDecoder(
 110     const std::vector<const uint8_t*>& nalu_data_ptrs,
 111     const std::vector<size_t>& nalu_data_sizes) {
 112   DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
 113   // Construct a new format description from the parameter sets.
 114   // TODO(sandersd): Replace this with custom code to support OS X < 10.9.
 115   format_.reset();
 116   CHECK(!CMVideoFormatDescriptionCreateFromH264ParameterSets(
 117       kCFAllocatorDefault,
 118       nalu_data_ptrs.size(),      // parameter_set_count
 119       &nalu_data_ptrs.front(),    // &parameter_set_pointers
 120       &nalu_data_sizes.front(),   // &parameter_set_sizes
 121       kNALUHeaderLength,          // nal_unit_header_length
 122       format_.InitializeInto()));
 123   CMVideoDimensions coded_dimensions =
 124       CMVideoFormatDescriptionGetDimensions(format_);
 125
 126   // Prepare VideoToolbox configuration dictionaries.
 127   base::ScopedCFTypeRef<CFMutableDictionaryRef> decoder_config(
 128       CFDictionaryCreateMutable(
 129           kCFAllocatorDefault,
 130           1,  // capacity
 131           &kCFTypeDictionaryKeyCallBacks,
 132           &kCFTypeDictionaryValueCallBacks));
 133
 134   CFDictionarySetValue(
 135       decoder_config,
 136       // kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
 137       CFSTR("EnableHardwareAcceleratedVideoDecoder"),
 138       kCFBooleanTrue);
 139
 140   base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config(
 141       CFDictionaryCreateMutable(
 142           kCFAllocatorDefault,
 143           4,  // capacity
 144           &kCFTypeDictionaryKeyCallBacks,
 145           &kCFTypeDictionaryValueCallBacks));
 146
 147 #define CFINT(i) CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &i)
 148   // TODO(sandersd): RGBA option for 4:4:4 video.
 149   int32_t pixel_format = kCVPixelFormatType_422YpCbCr8;
 150   base::ScopedCFTypeRef<CFNumberRef> cf_pixel_format(CFINT(pixel_format));
 151   base::ScopedCFTypeRef<CFNumberRef> cf_width(CFINT(coded_dimensions.width));
 152   base::ScopedCFTypeRef<CFNumberRef> cf_height(CFINT(coded_dimensions.height));
 153 #undef CFINT
 154   CFDictionarySetValue(
 155       image_config, kCVPixelBufferPixelFormatTypeKey, cf_pixel_format);
 156   CFDictionarySetValue(image_config, kCVPixelBufferWidthKey, cf_width);
 157   CFDictionarySetValue(image_config, kCVPixelBufferHeightKey, cf_height);
 158   CFDictionarySetValue(
 159       image_config, kCVPixelBufferOpenGLCompatibilityKey, kCFBooleanTrue);
 160
 161   // TODO(sandersd): Check if the session is already compatible.
 162   // TODO(sandersd): Flush.
 163   session_.reset();
 164   CHECK(!VTDecompressionSessionCreate(
 165       kCFAllocatorDefault,
 166       format_,              // video_format_description
 167       decoder_config,       // video_decoder_specification
 168       image_config,         // destination_image_buffer_attributes
 169       &callback_,           // output_callback
 170       session_.InitializeInto()));
 171
 172   // If the size has changed, trigger a request for new picture buffers.
 173   gfx::Size new_coded_size(coded_dimensions.width, coded_dimensions.height);
 174   if (coded_size_ != new_coded_size) {
 175     coded_size_ = new_coded_size;
 176     gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
 177         &VTVideoDecodeAccelerator::SizeChangedTask,
 178         weak_this_factory_.GetWeakPtr(),
 179         coded_size_));;
 180   }
 181 }
 182
 183 void VTVideoDecodeAccelerator::Decode(const media::BitstreamBuffer& bitstream) {
 184   DCHECK(CalledOnValidThread());
 185   // TODO(sandersd): Test what happens if bitstream buffers are passed to VT out
 186   // of order.
 187   decoder_thread_.message_loop_proxy()->PostTask(FROM_HERE, base::Bind(
 188       &VTVideoDecodeAccelerator::DecodeTask, base::Unretained(this),
 189       bitstream));
 190 }
 191
 192 // TODO(sandersd): Proper error reporting instead of CHECKs.
 193 void VTVideoDecodeAccelerator::DecodeTask(
 194     const media::BitstreamBuffer bitstream) {
 195   DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
 196
 197   // Map the bitstream buffer.
 198   base::SharedMemory memory(bitstream.handle(), true);
 199   size_t size = bitstream.size();
 200   CHECK(memory.Map(size));
 201   const uint8_t* buf = static_cast<uint8_t*>(memory.memory());
 202
 203   // NALUs are stored with Annex B format in the bitstream buffer (start codes),
 204   // but VideoToolbox expects AVCC/MPEG-4 format (length headers), so we must
 205   // rewrite the data.
 206   //
 207   // 1. Locate relevant NALUs and compute the size of the translated data.
 208   //    Also record any parameter sets for VideoToolbox initialization.
 209   size_t data_size = 0;
 210   std::vector<media::H264NALU> nalus;
 211   std::vector<const uint8_t*> config_nalu_data_ptrs;
 212   std::vector<size_t> config_nalu_data_sizes;
 213   parser_.SetStream(buf, size);
 214   media::H264NALU nalu;
 215   while (true) {
 216     media::H264Parser::Result result = parser_.AdvanceToNextNALU(&nalu);
 217     if (result == media::H264Parser::kEOStream)
 218       break;
 219     CHECK_EQ(result, media::H264Parser::kOk);
 220     // TODO(sandersd): Check that these are only at the start.
 221     if (nalu.nal_unit_type == media::H264NALU::kSPS ||
 222         nalu.nal_unit_type == media::H264NALU::kPPS ||
 223         nalu.nal_unit_type == media::H264NALU::kSPSExt) {
 224       DVLOG(2) << "Parameter set " << nalu.nal_unit_type;
 225       config_nalu_data_ptrs.push_back(nalu.data);
 226       config_nalu_data_sizes.push_back(nalu.size);
 227     } else {
 228       nalus.push_back(nalu);
 229       data_size += kNALUHeaderLength + nalu.size;
 230     }
 231   }
 232
 233   // 2. Initialize VideoToolbox.
 234   // TODO(sandersd): Reinitialize when there are new parameter sets.
 235   if (!session_)
 236     ConfigureDecoder(config_nalu_data_ptrs, config_nalu_data_sizes);
 237
 238   // 3. Allocate a memory-backed CMBlockBuffer for the translated data.
 239   base::ScopedCFTypeRef<CMBlockBufferRef> data;
 240   CHECK(!CMBlockBufferCreateWithMemoryBlock(
 241       kCFAllocatorDefault,
 242       NULL,                 // &memory_block
 243       data_size,            // block_length
 244       kCFAllocatorDefault,  // block_allocator
 245       NULL,                 // &custom_block_source
 246       0,                    // offset_to_data
 247       data_size,            // data_length
 248       0,                    // flags
 249       data.InitializeInto()));
 250
 251   // 4. Copy NALU data, inserting length headers.
 252   size_t offset = 0;
 253   for (size_t i = 0; i < nalus.size(); i++) {
 254     media::H264NALU& nalu = nalus[i];
 255     uint32_t header = base::HostToNet32(static_cast<uint32_t>(nalu.size));
 256     CHECK(!CMBlockBufferReplaceDataBytes(
 257         &header, data, offset, kNALUHeaderLength));
 258     offset += kNALUHeaderLength;
 259     CHECK(!CMBlockBufferReplaceDataBytes(nalu.data, data, offset, nalu.size));
 260     offset += nalu.size;
 261   }
 262
 263   // 5. Package the data for VideoToolbox and request decoding.
 264   base::ScopedCFTypeRef<CMSampleBufferRef> frame;
 265   CHECK(!CMSampleBufferCreate(
 266       kCFAllocatorDefault,
 267       data,                 // data_buffer
 268       true,                 // data_ready
 269       NULL,                 // make_data_ready_callback
 270       NULL,                 // make_data_ready_refcon
 271       format_,              // format_description
 272       1,                    // num_samples
 273       0,                    // num_sample_timing_entries
 274       NULL,                 // &sample_timing_array
 275       0,                    // num_sample_size_entries
 276       NULL,                 // &sample_size_array
 277       frame.InitializeInto()));
 278
 279   // Asynchronous Decompression allows for parallel submission of frames
 280   // (without it, DecodeFrame() does not return until the frame has been
 281   // decoded). We don't enable Temporal Processing so that frames are always
 282   // returned in decode order; this makes it easier to avoid deadlock.
 283   VTDecodeFrameFlags decode_flags =
 284       kVTDecodeFrame_EnableAsynchronousDecompression;
 285
 286   intptr_t bitstream_id = bitstream.id();
 287   CHECK(!VTDecompressionSessionDecodeFrame(
 288       session_,
 289       frame,                                  // sample_buffer
 290       decode_flags,                           // decode_flags
 291       reinterpret_cast<void*>(bitstream_id),  // source_frame_refcon
 292       NULL));                                 // &info_flags_out
 293 }
 294
 295 // This method may be called on any VideoToolbox thread.
 296 // TODO(sandersd): Proper error reporting instead of CHECKs.
 297 void VTVideoDecodeAccelerator::Output(
 298     int32_t bitstream_id,
 299     OSStatus status,
 300     CVImageBufferRef image_buffer) {
 301   CHECK(!status);
 302   CHECK_EQ(CFGetTypeID(image_buffer), CVPixelBufferGetTypeID());
 303   CFRetain(image_buffer);
 304   gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
 305       &VTVideoDecodeAccelerator::OutputTask,
 306       weak_this_factory_.GetWeakPtr(),
 307       DecodedFrame(bitstream_id, image_buffer)));
 308 }
 309
 310 void VTVideoDecodeAccelerator::OutputTask(DecodedFrame frame) {
 311   DCHECK(CalledOnValidThread());
 312   decoded_frames_.push(frame);
 313   SendPictures();
 314 }
 315
 316 void VTVideoDecodeAccelerator::SizeChangedTask(gfx::Size coded_size) {
 317   DCHECK(CalledOnValidThread());
 318   texture_size_ = coded_size;
 319   // TODO(sandersd): Dismiss existing picture buffers.
 320   client_->ProvidePictureBuffers(
 321       kNumPictureBuffers, texture_size_, GL_TEXTURE_RECTANGLE_ARB);
 322 }
 323
 324 void VTVideoDecodeAccelerator::AssignPictureBuffers(
 325     const std::vector<media::PictureBuffer>& pictures) {
 326   DCHECK(CalledOnValidThread());
 327
 328   for (size_t i = 0; i < pictures.size(); i++) {
 329     CHECK(!texture_ids_.count(pictures[i].id()));
 330     available_picture_ids_.push(pictures[i].id());
 331     texture_ids_[pictures[i].id()] = pictures[i].texture_id();
 332   }
 333
 334   // Pictures are not marked as uncleared until this method returns. They will
 335   // become broken if they are used before that happens.
 336   gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
 337       &VTVideoDecodeAccelerator::SendPictures,
 338       weak_this_factory_.GetWeakPtr()));
 339 }
 340
 341 void VTVideoDecodeAccelerator::ReusePictureBuffer(int32_t picture_id) {
 342   DCHECK(CalledOnValidThread());
 343   DCHECK_EQ(CFGetRetainCount(picture_bindings_[picture_id]), 1);
 344   picture_bindings_.erase(picture_id);
 345   available_picture_ids_.push(picture_id);
 346   SendPictures();
 347 }
 348
 349 // TODO(sandersd): Proper error reporting instead of CHECKs.
 350 void VTVideoDecodeAccelerator::SendPictures() {
 351   DCHECK(CalledOnValidThread());
 352   if (available_picture_ids_.empty() || decoded_frames_.empty())
 353     return;
 354
 355   gfx::ScopedCGLSetCurrentContext scoped_set_current_context(cgl_context_);
 356   glEnable(GL_TEXTURE_RECTANGLE_ARB);
 357
 358   while (!available_picture_ids_.empty() && !decoded_frames_.empty()) {
 359     int32_t picture_id = available_picture_ids_.front();
 360     available_picture_ids_.pop();
 361     DecodedFrame frame = decoded_frames_.front();
 362     decoded_frames_.pop();
 363     IOSurfaceRef surface = CVPixelBufferGetIOSurface(frame.image_buffer);
 364
 365     gfx::ScopedTextureBinder
 366         texture_binder(GL_TEXTURE_RECTANGLE_ARB, texture_ids_[picture_id]);
 367     CHECK(!CGLTexImageIOSurface2D(
 368         cgl_context_,                 // ctx
 369         GL_TEXTURE_RECTANGLE_ARB,     // target
 370         GL_RGB,                       // internal_format
 371         texture_size_.width(),        // width
 372         texture_size_.height(),       // height
 373         GL_YCBCR_422_APPLE,           // format
 374         GL_UNSIGNED_SHORT_8_8_APPLE,  // type
 375         surface,                      // io_surface
 376         0));                          // plane
 377
 378     picture_bindings_[picture_id] = frame.image_buffer;
 379     client_->PictureReady(media::Picture(picture_id, frame.bitstream_id));
 380     client_->NotifyEndOfBitstreamBuffer(frame.bitstream_id);
 381   }
 382
 383   glDisable(GL_TEXTURE_RECTANGLE_ARB);
 384 }
 385
 386 void VTVideoDecodeAccelerator::Flush() {
 387   DCHECK(CalledOnValidThread());
 388   // TODO(sandersd): Trigger flush, sending frames.
 389 }
 390
 391 void VTVideoDecodeAccelerator::Reset() {
 392   DCHECK(CalledOnValidThread());
 393   // TODO(sandersd): Trigger flush, discarding frames.
 394 }
 395
 396 void VTVideoDecodeAccelerator::Destroy() {
 397   DCHECK(CalledOnValidThread());
 398   // TODO(sandersd): Trigger flush, discarding frames, and wait for them.
 399   delete this;
 400 }
 401
 402 bool VTVideoDecodeAccelerator::CanDecodeOnIOThread() {
 403   return false;
 404 }
 405
 406 }  // namespace content