src/third_party/webrtc/modules/audio_coding/neteq/interface/neteq.h

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
  12 #define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
  13
  14 #include <string.h>  // Provide access to size_t.
  15
  16 #include <vector>
  17
  18 #include "webrtc/base/constructormagic.h"
  19 #include "webrtc/common_types.h"
  20 #include "webrtc/modules/audio_coding/neteq/interface/audio_decoder.h"
  21 #include "webrtc/typedefs.h"
  22
  23 namespace webrtc {
  24
  25 // Forward declarations.
  26 struct WebRtcRTPHeader;
  27
  28 struct NetEqNetworkStatistics {
  29   uint16_t current_buffer_size_ms;  // Current jitter buffer size in ms.
  30   uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
  31   uint16_t jitter_peaks_found;  // 1 if adding extra delay due to peaky
  32                                 // jitter; 0 otherwise.
  33   uint16_t packet_loss_rate;  // Loss rate (network + late) in Q14.
  34   uint16_t packet_discard_rate;  // Late loss rate in Q14.
  35   uint16_t expand_rate;  // Fraction (of original stream) of synthesized
  36                          // speech inserted through expansion (in Q14).
  37   uint16_t preemptive_rate;  // Fraction of data inserted through pre-emptive
  38                              // expansion (in Q14).
  39   uint16_t accelerate_rate;  // Fraction of data removed through acceleration
  40                              // (in Q14).
  41   int32_t clockdrift_ppm;  // Average clock-drift in parts-per-million
  42                            // (positive or negative).
  43   int added_zero_samples;  // Number of zero samples added in "off" mode.
  44 };
  45
  46 enum NetEqOutputType {
  47   kOutputNormal,
  48   kOutputPLC,
  49   kOutputCNG,
  50   kOutputPLCtoCNG,
  51   kOutputVADPassive
  52 };
  53
  54 enum NetEqPlayoutMode {
  55   kPlayoutOn,
  56   kPlayoutOff,
  57   kPlayoutFax,
  58   kPlayoutStreaming
  59 };
  60
  61 // This is the interface class for NetEq.
  62 class NetEq {
  63  public:
  64   enum BackgroundNoiseMode {
  65     kBgnOn,    // Default behavior with eternal noise.
  66     kBgnFade,  // Noise fades to zero after some time.
  67     kBgnOff    // Background noise is always zero.
  68   };
  69
  70   struct Config {
  71     Config()
  72         : sample_rate_hz(16000),
  73           enable_audio_classifier(false),
  74           max_packets_in_buffer(50),
  75           // |max_delay_ms| has the same effect as calling SetMaximumDelay().
  76           max_delay_ms(2000),
  77           background_noise_mode(kBgnOff),
  78           playout_mode(kPlayoutOn) {}
  79
  80     int sample_rate_hz;  // Initial vale. Will change with input data.
  81     bool enable_audio_classifier;
  82     int max_packets_in_buffer;
  83     int max_delay_ms;
  84     BackgroundNoiseMode background_noise_mode;
  85     NetEqPlayoutMode playout_mode;
  86   };
  87
  88   enum ReturnCodes {
  89     kOK = 0,
  90     kFail = -1,
  91     kNotImplemented = -2
  92   };
  93
  94   enum ErrorCodes {
  95     kNoError = 0,
  96     kOtherError,
  97     kInvalidRtpPayloadType,
  98     kUnknownRtpPayloadType,
  99     kCodecNotSupported,
 100     kDecoderExists,
 101     kDecoderNotFound,
 102     kInvalidSampleRate,
 103     kInvalidPointer,
 104     kAccelerateError,
 105     kPreemptiveExpandError,
 106     kComfortNoiseErrorCode,
 107     kDecoderErrorCode,
 108     kOtherDecoderError,
 109     kInvalidOperation,
 110     kDtmfParameterError,
 111     kDtmfParsingError,
 112     kDtmfInsertError,
 113     kStereoNotSupported,
 114     kSampleUnderrun,
 115     kDecodedTooMuch,
 116     kFrameSplitError,
 117     kRedundancySplitError,
 118     kPacketBufferCorruption,
 119     kSyncPacketNotAccepted
 120   };
 121
 122   // Creates a new NetEq object, with parameters set in |config|. The |config|
 123   // object will only have to be valid for the duration of the call to this
 124   // method.
 125   static NetEq* Create(const NetEq::Config& config);
 126
 127   virtual ~NetEq() {}
 128
 129   // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
 130   // of the time when the packet was received, and should be measured with
 131   // the same tick rate as the RTP timestamp of the current payload.
 132   // Returns 0 on success, -1 on failure.
 133   virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
 134                            const uint8_t* payload,
 135                            int length_bytes,
 136                            uint32_t receive_timestamp) = 0;
 137
 138   // Inserts a sync-packet into packet queue. Sync-packets are decoded to
 139   // silence and are intended to keep AV-sync intact in an event of long packet
 140   // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
 141   // might insert sync-packet when they observe that buffer level of NetEq is
 142   // decreasing below a certain threshold, defined by the application.
 143   // Sync-packets should have the same payload type as the last audio payload
 144   // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
 145   // can be implied by inserting a sync-packet.
 146   // Returns kOk on success, kFail on failure.
 147   virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
 148                                uint32_t receive_timestamp) = 0;
 149
 150   // Instructs NetEq to deliver 10 ms of audio data. The data is written to
 151   // |output_audio|, which can hold (at least) |max_length| elements.
 152   // The number of channels that were written to the output is provided in
 153   // the output variable |num_channels|, and each channel contains
 154   // |samples_per_channel| elements. If more than one channel is written,
 155   // the samples are interleaved.
 156   // The speech type is written to |type|, if |type| is not NULL.
 157   // Returns kOK on success, or kFail in case of an error.
 158   virtual int GetAudio(size_t max_length, int16_t* output_audio,
 159                        int* samples_per_channel, int* num_channels,
 160                        NetEqOutputType* type) = 0;
 161
 162   // Associates |rtp_payload_type| with |codec| and stores the information in
 163   // the codec database. Returns 0 on success, -1 on failure.
 164   virtual int RegisterPayloadType(enum NetEqDecoder codec,
 165                                   uint8_t rtp_payload_type) = 0;
 166
 167   // Provides an externally created decoder object |decoder| to insert in the
 168   // decoder database. The decoder implements a decoder of type |codec| and
 169   // associates it with |rtp_payload_type|. Returns kOK on success,
 170   // kFail on failure.
 171   virtual int RegisterExternalDecoder(AudioDecoder* decoder,
 172                                       enum NetEqDecoder codec,
 173                                       uint8_t rtp_payload_type) = 0;
 174
 175   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
 176   // -1 on failure.
 177   virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
 178
 179   // Sets a minimum delay in millisecond for packet buffer. The minimum is
 180   // maintained unless a higher latency is dictated by channel condition.
 181   // Returns true if the minimum is successfully applied, otherwise false is
 182   // returned.
 183   virtual bool SetMinimumDelay(int delay_ms) = 0;
 184
 185   // Sets a maximum delay in milliseconds for packet buffer. The latency will
 186   // not exceed the given value, even required delay (given the channel
 187   // conditions) is higher. Calling this method has the same effect as setting
 188   // the |max_delay_ms| value in the NetEq::Config struct.
 189   virtual bool SetMaximumDelay(int delay_ms) = 0;
 190
 191   // The smallest latency required. This is computed bases on inter-arrival
 192   // time and internal NetEq logic. Note that in computing this latency none of
 193   // the user defined limits (applied by calling setMinimumDelay() and/or
 194   // SetMaximumDelay()) are applied.
 195   virtual int LeastRequiredDelayMs() const = 0;
 196
 197   // Not implemented.
 198   virtual int SetTargetDelay() = 0;
 199
 200   // Not implemented.
 201   virtual int TargetDelay() = 0;
 202
 203   // Not implemented.
 204   virtual int CurrentDelay() = 0;
 205
 206   // Sets the playout mode to |mode|.
 207   // Deprecated. Set the mode in the Config struct passed to the constructor.
 208   // TODO(henrik.lundin) Delete.
 209   virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
 210
 211   // Returns the current playout mode.
 212   // Deprecated.
 213   // TODO(henrik.lundin) Delete.
 214   virtual NetEqPlayoutMode PlayoutMode() const = 0;
 215
 216   // Writes the current network statistics to |stats|. The statistics are reset
 217   // after the call.
 218   virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
 219
 220   // Writes the last packet waiting times (in ms) to |waiting_times|. The number
 221   // of values written is no more than 100, but may be smaller if the interface
 222   // is polled again before 100 packets has arrived.
 223   virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
 224
 225   // Writes the current RTCP statistics to |stats|. The statistics are reset
 226   // and a new report period is started with the call.
 227   virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
 228
 229   // Same as RtcpStatistics(), but does not reset anything.
 230   virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
 231
 232   // Enables post-decode VAD. When enabled, GetAudio() will return
 233   // kOutputVADPassive when the signal contains no speech.
 234   virtual void EnableVad() = 0;
 235
 236   // Disables post-decode VAD.
 237   virtual void DisableVad() = 0;
 238
 239   // Gets the RTP timestamp for the last sample delivered by GetAudio().
 240   // Returns true if the RTP timestamp is valid, otherwise false.
 241   virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
 242
 243   // Not implemented.
 244   virtual int SetTargetNumberOfChannels() = 0;
 245
 246   // Not implemented.
 247   virtual int SetTargetSampleRate() = 0;
 248
 249   // Returns the error code for the last occurred error. If no error has
 250   // occurred, 0 is returned.
 251   virtual int LastError() const = 0;
 252
 253   // Returns the error code last returned by a decoder (audio or comfort noise).
 254   // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
 255   // this method to get the decoder's error code.
 256   virtual int LastDecoderError() = 0;
 257
 258   // Flushes both the packet buffer and the sync buffer.
 259   virtual void FlushBuffers() = 0;
 260
 261   // Current usage of packet-buffer and it's limits.
 262   virtual void PacketBufferStatistics(int* current_num_packets,
 263                                       int* max_num_packets) const = 0;
 264
 265   // Get sequence number and timestamp of the latest RTP.
 266   // This method is to facilitate NACK.
 267   virtual int DecodedRtpInfo(int* sequence_number,
 268                              uint32_t* timestamp) const = 0;
 269
 270  protected:
 271   NetEq() {}
 272
 273  private:
 274   DISALLOW_COPY_AND_ASSIGN(NetEq);
 275 };
 276
 277 }  // namespace webrtc
 278 #endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_