src/third_party/libjingle/source/talk/media/base/videocapturer.h

   1 // libjingle
   2 // Copyright 2010 Google Inc.
   3 //
   4 // Redistribution and use in source and binary forms, with or without
   5 // modification, are permitted provided that the following conditions are met:
   6 //
   7 //  1. Redistributions of source code must retain the above copyright notice,
   8 //     this list of conditions and the following disclaimer.
   9 //  2. Redistributions in binary form must reproduce the above copyright notice,
  10 //     this list of conditions and the following disclaimer in the documentation
  11 //     and/or other materials provided with the distribution.
  12 //  3. The name of the author may not be used to endorse or promote products
  13 //     derived from this software without specific prior written permission.
  14 //
  15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25 //
  26 // Declaration of abstract class VideoCapturer
  27
  28 #ifndef TALK_MEDIA_BASE_VIDEOCAPTURER_H_
  29 #define TALK_MEDIA_BASE_VIDEOCAPTURER_H_
  30
  31 #include <string>
  32 #include <vector>
  33
  34 #include "talk/base/basictypes.h"
  35 #include "talk/base/criticalsection.h"
  36 #include "talk/base/messagehandler.h"
  37 #include "talk/base/scoped_ptr.h"
  38 #include "talk/base/sigslot.h"
  39 #include "talk/base/thread.h"
  40 #include "talk/media/base/videocommon.h"
  41 #include "talk/media/devices/devicemanager.h"
  42
  43
  44 namespace cricket {
  45
  46 class VideoProcessor;
  47
  48 // Current state of the capturer.
  49 // TODO(hellner): CS_NO_DEVICE is an error code not a capture state. Separate
  50 //                error codes and states.
  51 enum CaptureState {
  52   CS_STOPPED,    // The capturer has been stopped or hasn't started yet.
  53   CS_STARTING,   // The capturer is in the process of starting. Note, it may
  54                  // still fail to start.
  55   CS_RUNNING,    // The capturer has been started successfully and is now
  56                  // capturing.
  57   CS_PAUSED,     // The capturer has been paused.
  58   CS_FAILED,     // The capturer failed to start.
  59   CS_NO_DEVICE,  // The capturer has no device and consequently failed to start.
  60 };
  61
  62 class VideoFrame;
  63
  64 struct CapturedFrame {
  65   static const uint32 kFrameHeaderSize = 40;  // Size from width to data_size.
  66   static const uint32 kUnknownDataSize = 0xFFFFFFFF;
  67
  68   CapturedFrame();
  69
  70   // Get the number of bytes of the frame data. If data_size is known, return
  71   // it directly. Otherwise, calculate the size based on width, height, and
  72   // fourcc. Return true if succeeded.
  73   bool GetDataSize(uint32* size) const;
  74
  75   // The width and height of the captured frame could be different from those
  76   // of VideoFormat. Once the first frame is captured, the width, height,
  77   // fourcc, pixel_width, and pixel_height should keep the same over frames.
  78   int    width;         // in number of pixels
  79   int    height;        // in number of pixels
  80   uint32 fourcc;        // compression
  81   uint32 pixel_width;   // width of a pixel, default is 1
  82   uint32 pixel_height;  // height of a pixel, default is 1
  83   int64  elapsed_time;  // elapsed time since the creation of the frame
  84                         // source (that is, the camera), in nanoseconds.
  85   int64  time_stamp;    // timestamp of when the frame was captured, in unix
  86                         // time with nanosecond units.
  87   uint32 data_size;     // number of bytes of the frame data
  88   int    rotation;      // rotation in degrees of the frame (0, 90, 180, 270)
  89   void*  data;          // pointer to the frame data. This object allocates the
  90                         // memory or points to an existing memory.
  91
  92  private:
  93   DISALLOW_COPY_AND_ASSIGN(CapturedFrame);
  94 };
  95
  96 // VideoCapturer is an abstract class that defines the interfaces for video
  97 // capturing. The subclasses implement the video capturer for various types of
  98 // capturers and various platforms.
  99 //
 100 // The captured frames may need to be adapted (for example, cropping). Adaptors
 101 // can be registered to the capturer or applied externally to the capturer.
 102 // If the adaptor is needed, it acts as the downstream of VideoCapturer, adapts
 103 // the captured frames, and delivers the adapted frames to other components
 104 // such as the encoder. Effects can also be registered to the capturer or
 105 // applied externally.
 106 //
 107 // Programming model:
 108 //   Create an object of a subclass of VideoCapturer
 109 //   Initialize
 110 //   SignalStateChange.connect()
 111 //   SignalFrameCaptured.connect()
 112 //   Find the capture format for Start() by either calling GetSupportedFormats()
 113 //   and selecting one of the supported or calling GetBestCaptureFormat().
 114 //   Start()
 115 //   GetCaptureFormat() optionally
 116 //   Stop()
 117 //
 118 // Assumption:
 119 //   The Start() and Stop() methods are called by a single thread (E.g., the
 120 //   media engine thread). Hence, the VideoCapture subclasses dont need to be
 121 //   thread safe.
 122 //
 123 class VideoCapturer
 124     : public sigslot::has_slots<>,
 125       public talk_base::MessageHandler {
 126  public:
 127   typedef std::vector<VideoProcessor*> VideoProcessors;
 128
 129   // All signals are marshalled to |thread| or the creating thread if
 130   // none is provided.
 131   VideoCapturer();
 132   explicit VideoCapturer(talk_base::Thread* thread);
 133   virtual ~VideoCapturer() {}
 134
 135   // Gets the id of the underlying device, which is available after the capturer
 136   // is initialized. Can be used to determine if two capturers reference the
 137   // same device.
 138   const std::string& GetId() const { return id_; }
 139
 140   // Get the capture formats supported by the video capturer. The supported
 141   // formats are non empty after the device has been opened successfully.
 142   const std::vector<VideoFormat>* GetSupportedFormats() const;
 143
 144   // Get the best capture format for the desired format. The best format is the
 145   // same as one of the supported formats except that the frame interval may be
 146   // different. If the application asks for 16x9 and the camera does not support
 147   // 16x9 HD or the application asks for 16x10, we find the closest 4x3 and then
 148   // crop; Otherwise, we find what the application asks for. Note that we assume
 149   // that for HD, the desired format is always 16x9. The subclasses can override
 150   // the default implementation.
 151   // Parameters
 152   //   desired: the input desired format. If desired.fourcc is not kAnyFourcc,
 153   //            the best capture format has the exactly same fourcc. Otherwise,
 154   //            the best capture format uses a fourcc in GetPreferredFourccs().
 155   //   best_format: the output of the best capture format.
 156   // Return false if there is no such a best format, that is, the desired format
 157   // is not supported.
 158   virtual bool GetBestCaptureFormat(const VideoFormat& desired,
 159                                     VideoFormat* best_format);
 160
 161   // TODO(hellner): deprecate (make private) the Start API in favor of this one.
 162   //                Also remove CS_STARTING as it is implied by the return
 163   //                value of StartCapturing().
 164   bool StartCapturing(const VideoFormat& capture_format);
 165   // Start the video capturer with the specified capture format.
 166   // Parameter
 167   //   capture_format: The caller got this parameter by either calling
 168   //                   GetSupportedFormats() and selecting one of the supported
 169   //                   or calling GetBestCaptureFormat().
 170   // Return
 171   //   CS_STARTING:  The capturer is trying to start. Success or failure will
 172   //                 be notified via the |SignalStateChange| callback.
 173   //   CS_RUNNING:   if the capturer is started and capturing.
 174   //   CS_PAUSED:    Will never be returned.
 175   //   CS_FAILED:    if the capturer failes to start..
 176   //   CS_NO_DEVICE: if the capturer has no device and fails to start.
 177   virtual CaptureState Start(const VideoFormat& capture_format) = 0;
 178   // Sets the desired aspect ratio. If the capturer is capturing at another
 179   // aspect ratio it will crop the width or the height so that asked for
 180   // aspect ratio is acheived. Note that ratio_w and ratio_h do not need to be
 181   // relatively prime.
 182   void UpdateAspectRatio(int ratio_w, int ratio_h);
 183   void ClearAspectRatio();
 184
 185   // Get the current capture format, which is set by the Start() call.
 186   // Note that the width and height of the captured frames may differ from the
 187   // capture format. For example, the capture format is HD but the captured
 188   // frames may be smaller than HD.
 189   const VideoFormat* GetCaptureFormat() const {
 190     return capture_format_.get();
 191   }
 192
 193   // Pause the video capturer.
 194   virtual bool Pause(bool paused);
 195   // Stop the video capturer.
 196   virtual void Stop() = 0;
 197   // Check if the video capturer is running.
 198   virtual bool IsRunning() = 0;
 199   // Restart the video capturer with the new |capture_format|.
 200   // Default implementation stops and starts the capturer.
 201   virtual bool Restart(const VideoFormat& capture_format);
 202   // TODO(thorcarpenter): This behavior of keeping the camera open just to emit
 203   // black frames is a total hack and should be fixed.
 204   // When muting, produce black frames then pause the camera.
 205   // When unmuting, start the camera. Camera starts unmuted.
 206   virtual bool MuteToBlackThenPause(bool muted);
 207   virtual bool IsMuted() const {
 208     return muted_;
 209   }
 210   CaptureState capture_state() const {
 211     return capture_state_;
 212   }
 213
 214   // Adds a video processor that will be applied on VideoFrames returned by
 215   // |SignalVideoFrame|. Multiple video processors can be added. The video
 216   // processors will be applied in the order they were added.
 217   void AddVideoProcessor(VideoProcessor* video_processor);
 218   // Removes the |video_processor| from the list of video processors or
 219   // returns false.
 220   bool RemoveVideoProcessor(VideoProcessor* video_processor);
 221
 222   // Returns true if the capturer is screencasting. This can be used to
 223   // implement screencast specific behavior.
 224   virtual bool IsScreencast() const = 0;
 225
 226   // Caps the VideoCapturer's format according to max_format. It can e.g. be
 227   // used to prevent cameras from capturing at a resolution or framerate that
 228   // the capturer is capable of but not performing satisfactorily at.
 229   // The capping is an upper bound for each component of the capturing format.
 230   // The fourcc component is ignored.
 231   void ConstrainSupportedFormats(const VideoFormat& max_format);
 232
 233   void set_enable_camera_list(bool enable_camera_list) {
 234     enable_camera_list_ = enable_camera_list;
 235   }
 236   bool enable_camera_list() {
 237     return enable_camera_list_;
 238   }
 239
 240   // Enable scaling to ensure square pixels.
 241   void set_square_pixel_aspect_ratio(bool square_pixel_aspect_ratio) {
 242     square_pixel_aspect_ratio_ = square_pixel_aspect_ratio;
 243   }
 244   bool square_pixel_aspect_ratio() {
 245     return square_pixel_aspect_ratio_;
 246   }
 247
 248   // Signal all capture state changes that are not a direct result of calling
 249   // Start().
 250   sigslot::signal2<VideoCapturer*, CaptureState> SignalStateChange;
 251   // Frame callbacks are multithreaded to allow disconnect and connect to be
 252   // called concurrently. It also ensures that it is safe to call disconnect
 253   // at any time which is needed since the signal may be called from an
 254   // unmarshalled thread owned by the VideoCapturer.
 255   // Signal the captured frame to downstream.
 256   sigslot::signal2<VideoCapturer*, const CapturedFrame*,
 257                    sigslot::multi_threaded_local> SignalFrameCaptured;
 258   // A VideoAdapter should be hooked up to SignalAdaptFrame which will be
 259   // called before forwarding the frame to SignalVideoFrame. The parameters
 260   // are this capturer instance, the input video frame and output frame
 261   // pointer, respectively.
 262   sigslot::signal3<VideoCapturer*, const VideoFrame*, VideoFrame**,
 263                    sigslot::multi_threaded_local> SignalAdaptFrame;
 264   // Signal the captured and possibly adapted frame to downstream consumers
 265   // such as the encoder.
 266   sigslot::signal2<VideoCapturer*, const VideoFrame*,
 267                    sigslot::multi_threaded_local> SignalVideoFrame;
 268
 269   const VideoProcessors& video_processors() const { return video_processors_; }
 270
 271   // If 'screencast_max_pixels' is set greater than zero, screencasts will be
 272   // scaled to be no larger than this value.
 273   // If set to zero, the max pixels will be limited to
 274   // Retina MacBookPro 15" resolution of 2880 x 1800.
 275   // For high fps, maximum pixels limit is set based on common 24" monitor
 276   // resolution of 2048 x 1280.
 277   int screencast_max_pixels() const { return screencast_max_pixels_; }
 278   void set_screencast_max_pixels(int p) {
 279     screencast_max_pixels_ = talk_base::_max(0, p);
 280   }
 281
 282  protected:
 283   // Callback attached to SignalFrameCaptured where SignalVideoFrames is called.
 284   void OnFrameCaptured(VideoCapturer* video_capturer,
 285                        const CapturedFrame* captured_frame);
 286   void SetCaptureState(CaptureState state);
 287
 288   // Marshals SignalStateChange onto thread_.
 289   void OnMessage(talk_base::Message* message);
 290
 291   // subclasses override this virtual method to provide a vector of fourccs, in
 292   // order of preference, that are expected by the media engine.
 293   virtual bool GetPreferredFourccs(std::vector<uint32>* fourccs) = 0;
 294
 295   // mutators to set private attributes
 296   void SetId(const std::string& id) {
 297     id_ = id;
 298   }
 299
 300   void SetCaptureFormat(const VideoFormat* format) {
 301     capture_format_.reset(format ? new VideoFormat(*format) : NULL);
 302   }
 303
 304   void SetSupportedFormats(const std::vector<VideoFormat>& formats);
 305
 306  private:
 307   void Construct();
 308   // Get the distance between the desired format and the supported format.
 309   // Return the max distance if they mismatch. See the implementation for
 310   // details.
 311   int64 GetFormatDistance(const VideoFormat& desired,
 312                           const VideoFormat& supported);
 313
 314   // Convert captured frame to readable string for LOG messages.
 315   std::string ToString(const CapturedFrame* frame) const;
 316
 317   // Applies all registered processors. If any of the processors signal that
 318   // the frame should be dropped the return value will be false. Note that
 319   // this frame should be dropped as it has not applied all processors.
 320   bool ApplyProcessors(VideoFrame* video_frame);
 321
 322   // Updates filtered_supported_formats_ so that it contains the formats in
 323   // supported_formats_ that fulfill all applied restrictions.
 324   void UpdateFilteredSupportedFormats();
 325   // Returns true if format doesn't fulfill all applied restrictions.
 326   bool ShouldFilterFormat(const VideoFormat& format) const;
 327
 328   talk_base::Thread* thread_;
 329   std::string id_;
 330   CaptureState capture_state_;
 331   talk_base::scoped_ptr<VideoFormat> capture_format_;
 332   std::vector<VideoFormat> supported_formats_;
 333   talk_base::scoped_ptr<VideoFormat> max_format_;
 334   std::vector<VideoFormat> filtered_supported_formats_;
 335
 336   int ratio_w_;  // View resolution. e.g. 1280 x 720.
 337   int ratio_h_;
 338   bool enable_camera_list_;
 339   bool square_pixel_aspect_ratio_;  // Enable scaling to square pixels.
 340   int scaled_width_;  // Current output size from ComputeScale.
 341   int scaled_height_;
 342   int screencast_max_pixels_;  // Downscale screencasts further if requested.
 343   bool muted_;
 344   int black_frame_count_down_;
 345
 346   talk_base::CriticalSection crit_;
 347   VideoProcessors video_processors_;
 348
 349   DISALLOW_COPY_AND_ASSIGN(VideoCapturer);
 350 };
 351
 352 }  // namespace cricket
 353
 354 #endif  // TALK_MEDIA_BASE_VIDEOCAPTURER_H_