src/third_party/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_SOURCE_AUDIO_CODING_MODULE_IMPL_H_
  12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_SOURCE_AUDIO_CODING_MODULE_IMPL_H_
  13
  14 #include <vector>
  15
  16 #include "webrtc/common_types.h"
  17 #include "webrtc/engine_configurations.h"
  18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
  19 #include "webrtc/modules/audio_coding/main/source/acm_codec_database.h"
  20 #include "webrtc/modules/audio_coding/main/source/acm_neteq.h"
  21 #include "webrtc/modules/audio_coding/main/source/acm_resampler.h"
  22 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
  23 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
  24
  25 namespace webrtc {
  26
  27 struct WebRtcACMAudioBuff;
  28 struct WebRtcACMCodecParams;
  29 class CriticalSectionWrapper;
  30 class RWLockWrapper;
  31 class Clock;
  32
  33 namespace acm2 {
  34 class Nack;
  35 }
  36
  37 namespace acm1 {
  38
  39 class ACMDTMFDetection;
  40 class ACMGenericCodec;
  41
  42 class AudioCodingModuleImpl : public AudioCodingModule {
  43  public:
  44   AudioCodingModuleImpl(const int32_t id, Clock* clock);
  45   ~AudioCodingModuleImpl();
  46
  47   // Change the unique identifier of this object.
  48   virtual int32_t ChangeUniqueId(const int32_t id);
  49
  50   // Returns the number of milliseconds until the module want a worker thread
  51   // to call Process.
  52   int32_t TimeUntilNextProcess();
  53
  54   // Process any pending tasks such as timeouts.
  55   int32_t Process();
  56
  57   /////////////////////////////////////////
  58   //   Sender
  59   //
  60
  61   // Initialize send codec.
  62   int32_t InitializeSender();
  63
  64   // Reset send codec.
  65   int32_t ResetEncoder();
  66
  67   // Can be called multiple times for Codec, CNG, RED.
  68   int32_t RegisterSendCodec(const CodecInst& send_codec);
  69
  70   // Register Secondary codec for dual-streaming. Dual-streaming is activated
  71   // right after the secondary codec is registered.
  72   int RegisterSecondarySendCodec(const CodecInst& send_codec);
  73
  74   // Unregister the secondary codec. Dual-streaming is deactivated right after
  75   // deregistering secondary codec.
  76   void UnregisterSecondarySendCodec();
  77
  78   // Get the secondary codec.
  79   int SecondarySendCodec(CodecInst* secondary_codec) const;
  80
  81   // Get current send codec.
  82   int32_t SendCodec(CodecInst* current_codec) const;
  83
  84   // Get current send frequency.
  85   int32_t SendFrequency() const;
  86
  87   // Get encode bit-rate.
  88   // Adaptive rate codecs return their current encode target rate, while other
  89   // codecs return there long-term average or their fixed rate.
  90   int32_t SendBitrate() const;
  91
  92   // Set available bandwidth, inform the encoder about the
  93   // estimated bandwidth received from the remote party.
  94   virtual int32_t SetReceivedEstimatedBandwidth(const int32_t bw);
  95
  96   // Register a transport callback which will be
  97   // called to deliver the encoded buffers.
  98   int32_t RegisterTransportCallback(AudioPacketizationCallback* transport);
  99
 100   // Add 10 ms of raw (PCM) audio data to the encoder.
 101   int32_t Add10MsData(const AudioFrame& audio_frame);
 102
 103   /////////////////////////////////////////
 104   // (FEC) Forward Error Correction
 105   //
 106
 107   // Configure FEC status i.e on/off.
 108   int32_t SetFECStatus(const bool enable_fec);
 109
 110   // Get FEC status.
 111   bool FECStatus() const;
 112
 113   /////////////////////////////////////////
 114   //   (VAD) Voice Activity Detection
 115   //   and
 116   //   (CNG) Comfort Noise Generation
 117   //
 118
 119   int32_t SetVAD(bool enable_dtx = true,
 120                  bool enable_vad = false,
 121                  ACMVADMode mode = VADNormal);
 122
 123   int32_t VAD(bool* dtx_enabled, bool* vad_enabled, ACMVADMode* mode) const;
 124
 125   int32_t RegisterVADCallback(ACMVADCallback* vad_callback);
 126
 127   /////////////////////////////////////////
 128   //   Receiver
 129   //
 130
 131   // Initialize receiver, resets codec database etc.
 132   int32_t InitializeReceiver();
 133
 134   // Reset the decoder state.
 135   int32_t ResetDecoder();
 136
 137   // Get current receive frequency.
 138   int32_t ReceiveFrequency() const;
 139
 140   // Get current playout frequency.
 141   int32_t PlayoutFrequency() const;
 142
 143   // Register possible receive codecs, can be called multiple times,
 144   // for codecs, CNG, DTMF, RED.
 145   int32_t RegisterReceiveCodec(const CodecInst& receive_codec);
 146
 147   // Get current received codec.
 148   int32_t ReceiveCodec(CodecInst* current_codec) const;
 149
 150   // Incoming packet from network parsed and ready for decode.
 151   int32_t IncomingPacket(const uint8_t* incoming_payload,
 152                          const int32_t payload_length,
 153                          const WebRtcRTPHeader& rtp_info);
 154
 155   // Incoming payloads, without rtp-info, the rtp-info will be created in ACM.
 156   // One usage for this API is when pre-encoded files are pushed in ACM.
 157   int32_t IncomingPayload(const uint8_t* incoming_payload,
 158                           const int32_t payload_length,
 159                           const uint8_t payload_type,
 160                           const uint32_t timestamp = 0);
 161
 162   // NetEq minimum playout delay (used for lip-sync). The actual target delay
 163   // is the max of |time_ms| and the required delay dictated by the channel.
 164   int SetMinimumPlayoutDelay(int time_ms);
 165
 166   // NetEq maximum playout delay. The actual target delay is the min of
 167   // |time_ms| and the required delay dictated by the channel.
 168   int SetMaximumPlayoutDelay(int time_ms);
 169
 170   // The shortest latency, in milliseconds, required by jitter buffer. This
 171   // is computed based on inter-arrival times and playout mode of NetEq. The
 172   // actual delay is the maximum of least-required-delay and the minimum-delay
 173   // specified by SetMinumumPlayoutDelay() API.
 174   //
 175   int LeastRequiredDelayMs() const ;
 176
 177   // Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf
 178   // tone.
 179   int32_t SetDtmfPlayoutStatus(const bool enable);
 180
 181   // Get Dtmf playout status.
 182   bool DtmfPlayoutStatus() const;
 183
 184   // Estimate the Bandwidth based on the incoming stream, needed
 185   // for one way audio where the RTCP send the BW estimate.
 186   // This is also done in the RTP module .
 187   int32_t DecoderEstimatedBandwidth() const;
 188
 189   // Set playout mode voice, fax.
 190   int32_t SetPlayoutMode(const AudioPlayoutMode mode);
 191
 192   // Get playout mode voice, fax.
 193   AudioPlayoutMode PlayoutMode() const;
 194
 195   // Get playout timestamp.
 196   int32_t PlayoutTimestamp(uint32_t* timestamp);
 197
 198   // Get 10 milliseconds of raw audio data to play out, and
 199   // automatic resample to the requested frequency if > 0.
 200   int32_t PlayoutData10Ms(int32_t desired_freq_hz,
 201                           AudioFrame* audio_frame);
 202
 203   /////////////////////////////////////////
 204   //   Statistics
 205   //
 206
 207   int32_t NetworkStatistics(ACMNetworkStatistics* statistics);
 208
 209   void DestructEncoderInst(void* inst);
 210
 211   int16_t AudioBuffer(WebRtcACMAudioBuff& buffer);
 212
 213   // GET RED payload for iSAC. The method id called when 'this' ACM is
 214   // the default ACM.
 215   int32_t REDPayloadISAC(const int32_t isac_rate,
 216                          const int16_t isac_bw_estimate,
 217                          uint8_t* payload,
 218                          int16_t* length_bytes);
 219
 220   int16_t SetAudioBuffer(WebRtcACMAudioBuff& buffer);
 221
 222   uint32_t EarliestTimestamp() const;
 223
 224   int32_t LastEncodedTimestamp(uint32_t& timestamp) const;
 225
 226   int32_t ReplaceInternalDTXWithWebRtc(const bool use_webrtc_dtx);
 227
 228   int32_t IsInternalDTXReplacedWithWebRtc(bool* uses_webrtc_dtx);
 229
 230   int SetISACMaxRate(int max_bit_per_sec);
 231
 232   int SetISACMaxPayloadSize(int max_size_bytes);
 233
 234   int32_t ConfigISACBandwidthEstimator(
 235       int frame_size_ms,
 236       int rate_bit_per_sec,
 237       bool enforce_frame_size = false);
 238
 239   int UnregisterReceiveCodec(uint8_t payload_type);
 240
 241   std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
 242
 243  protected:
 244   void UnregisterSendCodec();
 245
 246   int32_t UnregisterReceiveCodecSafe(const int16_t id);
 247
 248   ACMGenericCodec* CreateCodec(const CodecInst& codec);
 249
 250   int16_t DecoderParamByPlType(const uint8_t payload_type,
 251                                WebRtcACMCodecParams& codec_params) const;
 252
 253   int16_t DecoderListIDByPlName(
 254       const char* name, const uint16_t frequency = 0) const;
 255
 256   int32_t InitializeReceiverSafe();
 257
 258   bool HaveValidEncoder(const char* caller_name) const;
 259
 260   int32_t RegisterRecCodecMSSafe(const CodecInst& receive_codec,
 261                                  int16_t codec_id,
 262                                  int16_t mirror_id,
 263                                  ACMNetEQ::JitterBuffer jitter_buffer);
 264
 265   // Set VAD/DTX status. This function does not acquire a lock, and it is
 266   // created to be called only from inside a critical section.
 267   int SetVADSafe(bool enable_dtx, bool enable_vad, ACMVADMode mode);
 268
 269   // Process buffered audio when dual-streaming is not enabled (When RED is
 270   // enabled still this function is used.)
 271   int ProcessSingleStream();
 272
 273   // Process buffered audio when dual-streaming is enabled, i.e. secondary send
 274   // codec is registered.
 275   int ProcessDualStream();
 276
 277   // Preprocessing of input audio, including resampling and down-mixing if
 278   // required, before pushing audio into encoder's buffer.
 279   //
 280   // in_frame: input audio-frame
 281   // ptr_out: pointer to output audio_frame. If no preprocessing is required
 282   //          |ptr_out| will be pointing to |in_frame|, otherwise pointing to
 283   //          |preprocess_frame_|.
 284   //
 285   // Return value:
 286   //   -1: if encountering an error.
 287   //    0: otherwise.
 288   int PreprocessToAddData(const AudioFrame& in_frame,
 289                           const AudioFrame** ptr_out);
 290
 291   // Set initial playout delay.
 292   //  -delay_ms: delay in millisecond.
 293   //
 294   // Return value:
 295   //  -1: if cannot set the delay.
 296   //   0: if delay set successfully.
 297   int SetInitialPlayoutDelay(int delay_ms);
 298
 299   // Enable NACK and set the maximum size of the NACK list.
 300   int EnableNack(size_t max_nack_list_size);
 301
 302   // Disable NACK.
 303   void DisableNack();
 304
 305   void GetDecodingCallStatistics(AudioDecodingCallStats* call_stats) const;
 306
 307  private:
 308   // Change required states after starting to receive the codec corresponding
 309   // to |index|.
 310   int UpdateUponReceivingCodec(int index);
 311
 312   // Remove all slaves and initialize a stereo slave with required codecs
 313   // from the master.
 314   int InitStereoSlave();
 315
 316   // Returns true if the codec's |index| is registered with the master and
 317   // is a stereo codec, RED or CN.
 318   bool IsCodecForSlave(int index) const;
 319
 320   int EncodeFragmentation(int fragmentation_index, int payload_type,
 321                           uint32_t current_timestamp,
 322                           ACMGenericCodec* encoder,
 323                           uint8_t* stream);
 324
 325   void ResetFragmentation(int vector_size);
 326
 327   bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame);
 328
 329   // Push a synchronization packet into NetEq. Such packets result in a frame
 330   // of zeros (not decoded by the corresponding decoder). The size of the frame
 331   // is the same as last decoding. NetEq has a special payload for this.
 332   // Call within the scope of ACM critical section.
 333   int PushSyncPacketSafe();
 334
 335   // Update the parameters required in initial phase of buffering, when
 336   // initial playout delay is requested. Call within the scope of ACM critical
 337   // section.
 338   void UpdateBufferingSafe(const WebRtcRTPHeader& rtp_info,
 339                            int payload_len_bytes);
 340
 341   //
 342   // Return the timestamp of current time, computed according to sampling rate
 343   // of the codec identified by |codec_id|.
 344   //
 345   uint32_t NowTimestamp(int codec_id);
 346
 347   AudioPacketizationCallback* packetization_callback_;
 348   int32_t id_;
 349   uint32_t last_timestamp_;
 350   uint32_t last_in_timestamp_;
 351   CodecInst send_codec_inst_;
 352   uint8_t cng_nb_pltype_;
 353   uint8_t cng_wb_pltype_;
 354   uint8_t cng_swb_pltype_;
 355   uint8_t cng_fb_pltype_;
 356   uint8_t red_pltype_;
 357   bool vad_enabled_;
 358   bool dtx_enabled_;
 359   ACMVADMode vad_mode_;
 360   ACMGenericCodec* codecs_[ACMCodecDB::kMaxNumCodecs];
 361   ACMGenericCodec* slave_codecs_[ACMCodecDB::kMaxNumCodecs];
 362   int16_t mirror_codec_idx_[ACMCodecDB::kMaxNumCodecs];
 363   bool stereo_receive_[ACMCodecDB::kMaxNumCodecs];
 364   bool stereo_receive_registered_;
 365   bool stereo_send_;
 366   int prev_received_channel_;
 367   int expected_channels_;
 368   int32_t current_send_codec_idx_;
 369   int current_receive_codec_idx_;
 370   bool send_codec_registered_;
 371   ACMResampler input_resampler_;
 372   ACMResampler output_resampler_;
 373   ACMNetEQ neteq_;
 374   CriticalSectionWrapper* acm_crit_sect_;
 375   ACMVADCallback* vad_callback_;
 376   uint8_t last_recv_audio_codec_pltype_;
 377
 378   // RED/FEC.
 379   bool is_first_red_;
 380   bool fec_enabled_;
 381   // TODO(turajs): |red_buffer_| is allocated in constructor, why having them
 382   // as pointers and not an array. If concerned about the memory, then make a
 383   // set-up function to allocate them only when they are going to be used, i.e.
 384   // FEC or Dual-streaming is enabled.
 385   uint8_t* red_buffer_;
 386   // TODO(turajs): we actually don't need |fragmentation_| as a member variable.
 387   // It is sufficient to keep the length & payload type of previous payload in
 388   // member variables.
 389   RTPFragmentationHeader fragmentation_;
 390   uint32_t last_fec_timestamp_;
 391   // If no RED is registered as receive codec this
 392   // will have an invalid value.
 393   uint8_t receive_red_pltype_;
 394
 395   // This is to keep track of CN instances where we can send DTMFs.
 396   uint8_t previous_pltype_;
 397
 398   // This keeps track of payload types associated with codecs_[].
 399   // We define it as signed variable and initialize with -1 to indicate
 400   // unused elements.
 401   int16_t registered_pltypes_[ACMCodecDB::kMaxNumCodecs];
 402
 403   // Used when payloads are pushed into ACM without any RTP info
 404   // One example is when pre-encoded bit-stream is pushed from
 405   // a file.
 406   WebRtcRTPHeader* dummy_rtp_header_;
 407   uint16_t recv_pl_frame_size_smpls_;
 408
 409   bool receiver_initialized_;
 410   ACMDTMFDetection* dtmf_detector_;
 411
 412   AudioCodingFeedback* dtmf_callback_;
 413   int16_t last_detected_tone_;
 414   CriticalSectionWrapper* callback_crit_sect_;
 415
 416   AudioFrame audio_frame_;
 417   AudioFrame preprocess_frame_;
 418   CodecInst secondary_send_codec_inst_;
 419   scoped_ptr<ACMGenericCodec> secondary_encoder_;
 420
 421   // Initial delay.
 422   int initial_delay_ms_;
 423   int num_packets_accumulated_;
 424   int num_bytes_accumulated_;
 425   int accumulated_audio_ms_;
 426   int first_payload_received_;
 427   uint32_t last_incoming_send_timestamp_;
 428   bool track_neteq_buffer_;
 429   uint32_t playout_ts_;
 430
 431   // AV-sync is enabled. In AV-sync mode, sync packet pushed during long packet
 432   // losses.
 433   bool av_sync_;
 434
 435   // Latest send timestamp difference of two consecutive packets.
 436   uint32_t last_timestamp_diff_;
 437   uint16_t last_sequence_number_;
 438   uint32_t last_ssrc_;
 439   bool last_packet_was_sync_;
 440   int64_t last_receive_timestamp_;
 441
 442   Clock* clock_;
 443   scoped_ptr<acm2::Nack> nack_;
 444   bool nack_enabled_;
 445
 446   acm2::CallStatistics call_stats_;
 447 };
 448
 449 }  // namespace acm1
 450
 451 }  // namespace webrtc
 452
 453 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_SOURCE_AUDIO_CODING_MODULE_IMPL_H_