src/third_party/webrtc/modules/audio_coding/main/acm2/acm_receiver.h

   1 /*
   2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
  12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
  13
  14 #include <vector>
  15
  16 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
  17 #include "webrtc/engine_configurations.h"
  18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
  19 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
  20 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
  21 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
  22 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
  23 #include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
  24 #include "webrtc/modules/interface/module_common_types.h"
  25 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
  26 #include "webrtc/typedefs.h"
  27
  28 namespace webrtc {
  29
  30 struct CodecInst;
  31 class CriticalSectionWrapper;
  32 class RWLockWrapper;
  33 class NetEq;
  34
  35 namespace acm2 {
  36
  37 class Nack;
  38
  39 class AcmReceiver {
  40  public:
  41   struct Decoder {
  42     bool registered;
  43     uint8_t payload_type;
  44     // This field is meaningful for codecs where both mono and
  45     // stereo versions are registered under the same ID.
  46     int channels;
  47   };
  48
  49   // Constructor of the class
  50   explicit AcmReceiver(const AudioCodingModule::Config& config);
  51
  52   // Destructor of the class.
  53   ~AcmReceiver();
  54
  55   //
  56   // Inserts a payload with its associated RTP-header into NetEq.
  57   //
  58   // Input:
  59   //   - rtp_header           : RTP header for the incoming payload containing
  60   //                            information about payload type, sequence number,
  61   //                            timestamp, SSRC and marker bit.
  62   //   - incoming_payload     : Incoming audio payload.
  63   //   - length_payload       : Length of incoming audio payload in bytes.
  64   //
  65   // Return value             : 0 if OK.
  66   //                           <0 if NetEq returned an error.
  67   //
  68   int InsertPacket(const WebRtcRTPHeader& rtp_header,
  69                    const uint8_t* incoming_payload,
  70                    int length_payload);
  71
  72   //
  73   // Asks NetEq for 10 milliseconds of decoded audio.
  74   //
  75   // Input:
  76   //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
  77   //                            audio. If set -1 indicates to resampling is
  78   //                            is required and the audio returned at the
  79   //                            sampling rate of the decoder.
  80   //
  81   // Output:
  82   //   -audio_frame           : an audio frame were output data and
  83   //                            associated parameters are written to.
  84   //
  85   // Return value             : 0 if OK.
  86   //                           -1 if NetEq returned an error.
  87   //
  88   int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);
  89
  90   //
  91   // Adds a new codec to the NetEq codec database.
  92   //
  93   // Input:
  94   //   - acm_codec_id        : ACM codec ID.
  95   //   - payload_type        : payload type.
  96   //   - audio_decoder       : pointer to a decoder object. If it is NULL
  97   //                           then NetEq will internally create the decoder
  98   //                           object. Otherwise, NetEq will store this pointer
  99   //                           as the decoder corresponding with the given
 100   //                           payload type. NetEq won't acquire the ownership
 101   //                           of this pointer. It is up to the client of this
 102   //                           class (ACM) to delete it. By providing
 103   //                           |audio_decoder| ACM will have control over the
 104   //                           decoder instance of the codec. This is essential
 105   //                           for a codec like iSAC which encoder/decoder
 106   //                           encoder has to know about decoder (bandwidth
 107   //                           estimator that is updated at decoding time).
 108   //
 109   // Return value             : 0 if OK.
 110   //                           <0 if NetEq returned an error.
 111   //
 112   int AddCodec(int acm_codec_id,
 113                uint8_t payload_type,
 114                int channels,
 115                AudioDecoder* audio_decoder);
 116
 117   //
 118   // Sets a minimum delay for packet buffer. The given delay is maintained,
 119   // unless channel condition dictates a higher delay.
 120   //
 121   // Input:
 122   //   - delay_ms             : minimum delay in milliseconds.
 123   //
 124   // Return value             : 0 if OK.
 125   //                           <0 if NetEq returned an error.
 126   //
 127   int SetMinimumDelay(int delay_ms);
 128
 129   //
 130   // Sets a maximum delay [ms] for the packet buffer. The target delay does not
 131   // exceed the given value, even if channel condition requires so.
 132   //
 133   // Input:
 134   //   - delay_ms             : maximum delay in milliseconds.
 135   //
 136   // Return value             : 0 if OK.
 137   //                           <0 if NetEq returned an error.
 138   //
 139   int SetMaximumDelay(int delay_ms);
 140
 141   //
 142   // Get least required delay computed based on channel conditions. Note that
 143   // this is before applying any user-defined limits (specified by calling
 144   // (SetMinimumDelay() and/or SetMaximumDelay()).
 145   //
 146   int LeastRequiredDelayMs() const;
 147
 148   //
 149   // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout
 150   // delay. Silence (zero signal) is played out until equivalent of |delay_ms|
 151   // millisecond of audio is buffered. Then, NetEq maintains the delay.
 152   //
 153   // Input:
 154   //   - delay_ms             : initial delay in milliseconds.
 155   //
 156   // Return value             : 0 if OK.
 157   //                           <0 if NetEq returned an error.
 158   //
 159   int SetInitialDelay(int delay_ms);
 160
 161   //
 162   // Resets the initial delay to zero.
 163   //
 164   void ResetInitialDelay();
 165
 166   //
 167   // Get the current sampling frequency in Hz.
 168   //
 169   // Return value             : Sampling frequency in Hz.
 170   //
 171   int current_sample_rate_hz() const;
 172
 173   //
 174   // Sets the playout mode.
 175   //
 176   // Input:
 177   //   - mode                 : an enumerator specifying the playout mode.
 178   //
 179   void SetPlayoutMode(AudioPlayoutMode mode);
 180
 181   //
 182   // Get the current playout mode.
 183   //
 184   // Return value             : The current playout mode.
 185   //
 186   AudioPlayoutMode PlayoutMode() const;
 187
 188   //
 189   // Get the current network statistics from NetEq.
 190   //
 191   // Output:
 192   //   - statistics           : The current network statistics.
 193   //
 194   void NetworkStatistics(ACMNetworkStatistics* statistics);
 195
 196   //
 197   // Enable post-decoding VAD.
 198   //
 199   void EnableVad();
 200
 201   //
 202   // Disable post-decoding VAD.
 203   //
 204   void DisableVad();
 205
 206   //
 207   // Returns whether post-decoding VAD is enabled (true) or disabled (false).
 208   //
 209   bool vad_enabled() const { return vad_enabled_; }
 210
 211   //
 212   // Get the decode lock used to protect decoder instances while decoding.
 213   //
 214   // Return value             : Pointer to the decode lock.
 215   //
 216   RWLockWrapper* DecodeLock() const { return decode_lock_; }
 217
 218   //
 219   // Flushes the NetEq packet and speech buffers.
 220   //
 221   void FlushBuffers();
 222
 223   //
 224   // Removes a payload-type from the NetEq codec database.
 225   //
 226   // Input:
 227   //   - payload_type         : the payload-type to be removed.
 228   //
 229   // Return value             : 0 if OK.
 230   //                           -1 if an error occurred.
 231   //
 232   int RemoveCodec(uint8_t payload_type);
 233
 234   //
 235   // Remove all registered codecs.
 236   //
 237   int RemoveAllCodecs();
 238
 239   //
 240   // Set ID.
 241   //
 242   void set_id(int id);  // TODO(turajs): can be inline.
 243
 244   //
 245   // Returns the RTP timestamp of the last sample delivered by GetAudio().
 246   //
 247   uint32_t PlayoutTimestamp();
 248
 249   //
 250   // Return the index of the codec associated with the last non-CNG/non-DTMF
 251   // received payload. If no non-CNG/non-DTMF payload is received -1 is
 252   // returned.
 253   //
 254   int last_audio_codec_id() const;  // TODO(turajs): can be inline.
 255
 256   //
 257   // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
 258   // non-CNG/non-DTMF packet is received -1 is returned.
 259   //
 260   int last_audio_payload_type() const;  // TODO(turajs): can be inline.
 261
 262   //
 263   // Get the audio codec associated with the last non-CNG/non-DTMF received
 264   // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
 265   // otherwise return 0.
 266   //
 267   int LastAudioCodec(CodecInst* codec) const;
 268
 269   //
 270   // Return payload type of RED if it is registered, otherwise return -1;
 271   //
 272   int RedPayloadType() const;
 273
 274   //
 275   // Get a decoder given its registered payload-type.
 276   //
 277   // Input:
 278   //    -payload_type         : the payload-type of the codec to be retrieved.
 279   //
 280   // Output:
 281   //    -codec                : codec associated with the given payload-type.
 282   //
 283   // Return value             : 0 if succeeded.
 284   //                           -1 if failed, e.g. given payload-type is not
 285   //                              registered.
 286   //
 287   int DecoderByPayloadType(uint8_t payload_type,
 288                            CodecInst* codec) const;
 289
 290   //
 291   // Enable NACK and set the maximum size of the NACK list. If NACK is already
 292   // enabled then the maximum NACK list size is modified accordingly.
 293   //
 294   // Input:
 295   //    -max_nack_list_size  : maximum NACK list size
 296   //                           should be positive (none zero) and less than or
 297   //                           equal to |Nack::kNackListSizeLimit|
 298   // Return value
 299   //                         : 0 if succeeded.
 300   //                          -1 if failed
 301   //
 302   int EnableNack(size_t max_nack_list_size);
 303
 304   // Disable NACK.
 305   void DisableNack();
 306
 307   //
 308   // Get a list of packets to be retransmitted.
 309   //
 310   // Input:
 311   //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
 312   // Return value           : list of packets to be retransmitted.
 313   //
 314   std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
 315
 316   //
 317   // Returns the background noise mode. This is only for testing and ACM is not
 318   // calling this function. Used in acm_receiver_unittest.cc.
 319   //
 320   NetEqBackgroundNoiseMode BackgroundNoiseModeForTest() const;
 321
 322   //
 323   // Get statistics of calls to GetAudio().
 324   void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;
 325
 326  private:
 327   int PayloadType2CodecIndex(uint8_t payload_type) const;
 328
 329   bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame);
 330
 331   int GetNumSyncPacketToInsert(uint16_t received_squence_number);
 332
 333   int RtpHeaderToCodecIndex(
 334       const RTPHeader& rtp_header, const uint8_t* payload) const;
 335
 336   uint32_t NowInTimestamp(int decoder_sampling_rate) const;
 337
 338   void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);
 339
 340   int id_;
 341   NetEq* neteq_;
 342   Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
 343   int last_audio_decoder_;
 344   RWLockWrapper* decode_lock_;
 345   CriticalSectionWrapper* neteq_crit_sect_;
 346   bool vad_enabled_;
 347   AudioFrame::VADActivity previous_audio_activity_;
 348   int current_sample_rate_hz_;
 349   ACMResampler resampler_;
 350   // Used in GetAudio, declared as member to avoid allocating every 10ms.
 351   int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples];
 352   scoped_ptr<Nack> nack_;
 353   bool nack_enabled_;
 354   Clock* clock_;
 355
 356   // Indicates if a non-zero initial delay is set, and the receiver is in
 357   // AV-sync mode.
 358   bool av_sync_;
 359   scoped_ptr<InitialDelayManager> initial_delay_manager_;
 360
 361   // The following are defined as members to avoid creating them in every
 362   // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket().
 363   // |late_packets_sync_stream_| is only used in GetAudio(). Both of these
 364   // member variables are allocated only when we AV-sync is enabled, i.e.
 365   // initial delay is set.
 366   scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
 367   scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
 368
 369   CallStatistics call_stats_;
 370 };
 371
 372 }  // namespace acm2
 373
 374 }  // namespace webrtc
 375
 376 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_