src/third_party/webrtc/modules/audio_coding/main/acm2/acm_receiver.h

   1 /*
   2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
  12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
  13
  14 #include <vector>
  15
  16 #include "webrtc/base/thread_annotations.h"
  17 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
  18 #include "webrtc/engine_configurations.h"
  19 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
  20 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
  21 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
  22 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
  23 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
  24 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
  25 #include "webrtc/modules/interface/module_common_types.h"
  26 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
  27 #include "webrtc/typedefs.h"
  28
  29 namespace webrtc {
  30
  31 struct CodecInst;
  32 class CriticalSectionWrapper;
  33 class NetEq;
  34
  35 namespace acm2 {
  36
  37 class Nack;
  38
  39 class AcmReceiver {
  40  public:
  41   struct Decoder {
  42     bool registered;
  43     uint8_t payload_type;
  44     // This field is meaningful for codecs where both mono and
  45     // stereo versions are registered under the same ID.
  46     int channels;
  47   };
  48
  49   // Constructor of the class
  50   explicit AcmReceiver(const AudioCodingModule::Config& config);
  51
  52   // Destructor of the class.
  53   ~AcmReceiver();
  54
  55   //
  56   // Inserts a payload with its associated RTP-header into NetEq.
  57   //
  58   // Input:
  59   //   - rtp_header           : RTP header for the incoming payload containing
  60   //                            information about payload type, sequence number,
  61   //                            timestamp, SSRC and marker bit.
  62   //   - incoming_payload     : Incoming audio payload.
  63   //   - length_payload       : Length of incoming audio payload in bytes.
  64   //
  65   // Return value             : 0 if OK.
  66   //                           <0 if NetEq returned an error.
  67   //
  68   int InsertPacket(const WebRtcRTPHeader& rtp_header,
  69                    const uint8_t* incoming_payload,
  70                    int length_payload);
  71
  72   //
  73   // Asks NetEq for 10 milliseconds of decoded audio.
  74   //
  75   // Input:
  76   //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
  77   //                            audio. If set -1 indicates to resampling is
  78   //                            is required and the audio returned at the
  79   //                            sampling rate of the decoder.
  80   //
  81   // Output:
  82   //   -audio_frame           : an audio frame were output data and
  83   //                            associated parameters are written to.
  84   //
  85   // Return value             : 0 if OK.
  86   //                           -1 if NetEq returned an error.
  87   //
  88   int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);
  89
  90   //
  91   // Adds a new codec to the NetEq codec database.
  92   //
  93   // Input:
  94   //   - acm_codec_id        : ACM codec ID.
  95   //   - payload_type        : payload type.
  96   //   - audio_decoder       : pointer to a decoder object. If it is NULL
  97   //                           then NetEq will internally create the decoder
  98   //                           object. Otherwise, NetEq will store this pointer
  99   //                           as the decoder corresponding with the given
 100   //                           payload type. NetEq won't acquire the ownership
 101   //                           of this pointer. It is up to the client of this
 102   //                           class (ACM) to delete it. By providing
 103   //                           |audio_decoder| ACM will have control over the
 104   //                           decoder instance of the codec. This is essential
 105   //                           for a codec like iSAC which encoder/decoder
 106   //                           encoder has to know about decoder (bandwidth
 107   //                           estimator that is updated at decoding time).
 108   //
 109   // Return value             : 0 if OK.
 110   //                           <0 if NetEq returned an error.
 111   //
 112   int AddCodec(int acm_codec_id,
 113                uint8_t payload_type,
 114                int channels,
 115                AudioDecoder* audio_decoder);
 116
 117   //
 118   // Sets a minimum delay for packet buffer. The given delay is maintained,
 119   // unless channel condition dictates a higher delay.
 120   //
 121   // Input:
 122   //   - delay_ms             : minimum delay in milliseconds.
 123   //
 124   // Return value             : 0 if OK.
 125   //                           <0 if NetEq returned an error.
 126   //
 127   int SetMinimumDelay(int delay_ms);
 128
 129   //
 130   // Sets a maximum delay [ms] for the packet buffer. The target delay does not
 131   // exceed the given value, even if channel condition requires so.
 132   //
 133   // Input:
 134   //   - delay_ms             : maximum delay in milliseconds.
 135   //
 136   // Return value             : 0 if OK.
 137   //                           <0 if NetEq returned an error.
 138   //
 139   int SetMaximumDelay(int delay_ms);
 140
 141   //
 142   // Get least required delay computed based on channel conditions. Note that
 143   // this is before applying any user-defined limits (specified by calling
 144   // (SetMinimumDelay() and/or SetMaximumDelay()).
 145   //
 146   int LeastRequiredDelayMs() const;
 147
 148   //
 149   // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout
 150   // delay. Silence (zero signal) is played out until equivalent of |delay_ms|
 151   // millisecond of audio is buffered. Then, NetEq maintains the delay.
 152   //
 153   // Input:
 154   //   - delay_ms             : initial delay in milliseconds.
 155   //
 156   // Return value             : 0 if OK.
 157   //                           <0 if NetEq returned an error.
 158   //
 159   int SetInitialDelay(int delay_ms);
 160
 161   //
 162   // Resets the initial delay to zero.
 163   //
 164   void ResetInitialDelay();
 165
 166   //
 167   // Get the current sampling frequency in Hz.
 168   //
 169   // Return value             : Sampling frequency in Hz.
 170   //
 171   int current_sample_rate_hz() const;
 172
 173   //
 174   // Sets the playout mode.
 175   //
 176   // Input:
 177   //   - mode                 : an enumerator specifying the playout mode.
 178   //
 179   void SetPlayoutMode(AudioPlayoutMode mode);
 180
 181   //
 182   // Get the current playout mode.
 183   //
 184   // Return value             : The current playout mode.
 185   //
 186   AudioPlayoutMode PlayoutMode() const;
 187
 188   //
 189   // Get the current network statistics from NetEq.
 190   //
 191   // Output:
 192   //   - statistics           : The current network statistics.
 193   //
 194   void NetworkStatistics(ACMNetworkStatistics* statistics);
 195
 196   //
 197   // Enable post-decoding VAD.
 198   //
 199   void EnableVad();
 200
 201   //
 202   // Disable post-decoding VAD.
 203   //
 204   void DisableVad();
 205
 206   //
 207   // Returns whether post-decoding VAD is enabled (true) or disabled (false).
 208   //
 209   bool vad_enabled() const { return vad_enabled_; }
 210
 211   //
 212   // Flushes the NetEq packet and speech buffers.
 213   //
 214   void FlushBuffers();
 215
 216   //
 217   // Removes a payload-type from the NetEq codec database.
 218   //
 219   // Input:
 220   //   - payload_type         : the payload-type to be removed.
 221   //
 222   // Return value             : 0 if OK.
 223   //                           -1 if an error occurred.
 224   //
 225   int RemoveCodec(uint8_t payload_type);
 226
 227   //
 228   // Remove all registered codecs.
 229   //
 230   int RemoveAllCodecs();
 231
 232   //
 233   // Set ID.
 234   //
 235   void set_id(int id);  // TODO(turajs): can be inline.
 236
 237   //
 238   // Gets the RTP timestamp of the last sample delivered by GetAudio().
 239   // Returns true if the RTP timestamp is valid, otherwise false.
 240   //
 241   bool GetPlayoutTimestamp(uint32_t* timestamp);
 242
 243   //
 244   // Return the index of the codec associated with the last non-CNG/non-DTMF
 245   // received payload. If no non-CNG/non-DTMF payload is received -1 is
 246   // returned.
 247   //
 248   int last_audio_codec_id() const;  // TODO(turajs): can be inline.
 249
 250   //
 251   // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
 252   // non-CNG/non-DTMF packet is received -1 is returned.
 253   //
 254   int last_audio_payload_type() const;  // TODO(turajs): can be inline.
 255
 256   //
 257   // Get the audio codec associated with the last non-CNG/non-DTMF received
 258   // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
 259   // otherwise return 0.
 260   //
 261   int LastAudioCodec(CodecInst* codec) const;
 262
 263   //
 264   // Return payload type of RED if it is registered, otherwise return -1;
 265   //
 266   int RedPayloadType() const;
 267
 268   //
 269   // Get a decoder given its registered payload-type.
 270   //
 271   // Input:
 272   //    -payload_type         : the payload-type of the codec to be retrieved.
 273   //
 274   // Output:
 275   //    -codec                : codec associated with the given payload-type.
 276   //
 277   // Return value             : 0 if succeeded.
 278   //                           -1 if failed, e.g. given payload-type is not
 279   //                              registered.
 280   //
 281   int DecoderByPayloadType(uint8_t payload_type,
 282                            CodecInst* codec) const;
 283
 284   //
 285   // Enable NACK and set the maximum size of the NACK list. If NACK is already
 286   // enabled then the maximum NACK list size is modified accordingly.
 287   //
 288   // Input:
 289   //    -max_nack_list_size  : maximum NACK list size
 290   //                           should be positive (none zero) and less than or
 291   //                           equal to |Nack::kNackListSizeLimit|
 292   // Return value
 293   //                         : 0 if succeeded.
 294   //                          -1 if failed
 295   //
 296   int EnableNack(size_t max_nack_list_size);
 297
 298   // Disable NACK.
 299   void DisableNack();
 300
 301   //
 302   // Get a list of packets to be retransmitted.
 303   //
 304   // Input:
 305   //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
 306   // Return value           : list of packets to be retransmitted.
 307   //
 308   std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
 309
 310   //
 311   // Get statistics of calls to GetAudio().
 312   void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;
 313
 314  private:
 315   int PayloadType2CodecIndex(uint8_t payload_type) const;
 316
 317   bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame)
 318       EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
 319
 320   int GetNumSyncPacketToInsert(uint16_t received_squence_number);
 321
 322   int RtpHeaderToCodecIndex(
 323       const RTPHeader& rtp_header, const uint8_t* payload) const;
 324
 325   uint32_t NowInTimestamp(int decoder_sampling_rate) const;
 326
 327   void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);
 328
 329   scoped_ptr<CriticalSectionWrapper> crit_sect_;
 330   int id_;  // TODO(henrik.lundin) Make const.
 331   int last_audio_decoder_ GUARDED_BY(crit_sect_);
 332   AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
 333   int current_sample_rate_hz_ GUARDED_BY(crit_sect_);
 334   ACMResampler resampler_ GUARDED_BY(crit_sect_);
 335   // Used in GetAudio, declared as member to avoid allocating every 10ms.
 336   // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
 337   int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_);
 338   scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
 339   bool nack_enabled_ GUARDED_BY(crit_sect_);
 340   CallStatistics call_stats_ GUARDED_BY(crit_sect_);
 341   NetEq* neteq_;
 342   Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
 343   bool vad_enabled_;
 344   Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
 345
 346   // Indicates if a non-zero initial delay is set, and the receiver is in
 347   // AV-sync mode.
 348   bool av_sync_;
 349   scoped_ptr<InitialDelayManager> initial_delay_manager_;
 350
 351   // The following are defined as members to avoid creating them in every
 352   // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket().
 353   // |late_packets_sync_stream_| is only used in GetAudio(). Both of these
 354   // member variables are allocated only when we AV-sync is enabled, i.e.
 355   // initial delay is set.
 356   scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
 357   scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
 358 };
 359
 360 }  // namespace acm2
 361
 362 }  // namespace webrtc
 363
 364 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_