src/third_party/webrtc/modules/audio_coding/neteq4/interface/neteq.h

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_
  12 #define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_
  13
  14 #include <string.h>  // Provide access to size_t.
  15
  16 #include <vector>
  17
  18 #include "webrtc/common_types.h"
  19 #include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
  20 #include "webrtc/system_wrappers/interface/constructor_magic.h"
  21 #include "webrtc/typedefs.h"
  22
  23 namespace webrtc {
  24
  25 // Forward declarations.
  26 struct WebRtcRTPHeader;
  27
  28 struct NetEqNetworkStatistics {
  29   uint16_t current_buffer_size_ms;  // Current jitter buffer size in ms.
  30   uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
  31   uint16_t jitter_peaks_found;  // 1 if adding extra delay due to peaky
  32                                 // jitter; 0 otherwise.
  33   uint16_t packet_loss_rate;  // Loss rate (network + late) in Q14.
  34   uint16_t packet_discard_rate;  // Late loss rate in Q14.
  35   uint16_t expand_rate;  // Fraction (of original stream) of synthesized
  36                          // speech inserted through expansion (in Q14).
  37   uint16_t preemptive_rate;  // Fraction of data inserted through pre-emptive
  38                              // expansion (in Q14).
  39   uint16_t accelerate_rate;  // Fraction of data removed through acceleration
  40                              // (in Q14).
  41   int32_t clockdrift_ppm;  // Average clock-drift in parts-per-million
  42                            // (positive or negative).
  43   int added_zero_samples;  // Number of zero samples added in "off" mode.
  44 };
  45
  46 enum NetEqOutputType {
  47   kOutputNormal,
  48   kOutputPLC,
  49   kOutputCNG,
  50   kOutputPLCtoCNG,
  51   kOutputVADPassive
  52 };
  53
  54 enum NetEqPlayoutMode {
  55   kPlayoutOn,
  56   kPlayoutOff,
  57   kPlayoutFax,
  58   kPlayoutStreaming
  59 };
  60
  61 enum NetEqBackgroundNoiseMode {
  62   kBgnOn,    // Default behavior with eternal noise.
  63   kBgnFade,  // Noise fades to zero after some time.
  64   kBgnOff    // Background noise is always zero.
  65 };
  66
  67 // This is the interface class for NetEq.
  68 class NetEq {
  69  public:
  70   struct Config {
  71     Config()
  72         : sample_rate_hz(16000),
  73           enable_audio_classifier(false),
  74           max_packets_in_buffer(50),
  75           // |max_delay_ms| has the same effect as calling SetMaximumDelay().
  76           max_delay_ms(2000) {}
  77
  78     int sample_rate_hz;  // Initial vale. Will change with input data.
  79     bool enable_audio_classifier;
  80     int max_packets_in_buffer;
  81     int max_delay_ms;
  82   };
  83
  84   enum ReturnCodes {
  85     kOK = 0,
  86     kFail = -1,
  87     kNotImplemented = -2
  88   };
  89
  90   enum ErrorCodes {
  91     kNoError = 0,
  92     kOtherError,
  93     kInvalidRtpPayloadType,
  94     kUnknownRtpPayloadType,
  95     kCodecNotSupported,
  96     kDecoderExists,
  97     kDecoderNotFound,
  98     kInvalidSampleRate,
  99     kInvalidPointer,
 100     kAccelerateError,
 101     kPreemptiveExpandError,
 102     kComfortNoiseErrorCode,
 103     kDecoderErrorCode,
 104     kOtherDecoderError,
 105     kInvalidOperation,
 106     kDtmfParameterError,
 107     kDtmfParsingError,
 108     kDtmfInsertError,
 109     kStereoNotSupported,
 110     kSampleUnderrun,
 111     kDecodedTooMuch,
 112     kFrameSplitError,
 113     kRedundancySplitError,
 114     kPacketBufferCorruption,
 115     kSyncPacketNotAccepted
 116   };
 117
 118   // Creates a new NetEq object, with parameters set in |config|. The |config|
 119   // object will only have to be valid for the duration of the call to this
 120   // method.
 121   static NetEq* Create(const NetEq::Config& config);
 122
 123   virtual ~NetEq() {}
 124
 125   // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
 126   // of the time when the packet was received, and should be measured with
 127   // the same tick rate as the RTP timestamp of the current payload.
 128   // Returns 0 on success, -1 on failure.
 129   virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
 130                            const uint8_t* payload,
 131                            int length_bytes,
 132                            uint32_t receive_timestamp) = 0;
 133
 134   // Inserts a sync-packet into packet queue. Sync-packets are decoded to
 135   // silence and are intended to keep AV-sync intact in an event of long packet
 136   // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
 137   // might insert sync-packet when they observe that buffer level of NetEq is
 138   // decreasing below a certain threshold, defined by the application.
 139   // Sync-packets should have the same payload type as the last audio payload
 140   // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
 141   // can be implied by inserting a sync-packet.
 142   // Returns kOk on success, kFail on failure.
 143   virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
 144                                uint32_t receive_timestamp) = 0;
 145
 146   // Instructs NetEq to deliver 10 ms of audio data. The data is written to
 147   // |output_audio|, which can hold (at least) |max_length| elements.
 148   // The number of channels that were written to the output is provided in
 149   // the output variable |num_channels|, and each channel contains
 150   // |samples_per_channel| elements. If more than one channel is written,
 151   // the samples are interleaved.
 152   // The speech type is written to |type|, if |type| is not NULL.
 153   // Returns kOK on success, or kFail in case of an error.
 154   virtual int GetAudio(size_t max_length, int16_t* output_audio,
 155                        int* samples_per_channel, int* num_channels,
 156                        NetEqOutputType* type) = 0;
 157
 158   // Associates |rtp_payload_type| with |codec| and stores the information in
 159   // the codec database. Returns 0 on success, -1 on failure.
 160   virtual int RegisterPayloadType(enum NetEqDecoder codec,
 161                                   uint8_t rtp_payload_type) = 0;
 162
 163   // Provides an externally created decoder object |decoder| to insert in the
 164   // decoder database. The decoder implements a decoder of type |codec| and
 165   // associates it with |rtp_payload_type|. Returns kOK on success,
 166   // kFail on failure.
 167   virtual int RegisterExternalDecoder(AudioDecoder* decoder,
 168                                       enum NetEqDecoder codec,
 169                                       uint8_t rtp_payload_type) = 0;
 170
 171   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
 172   // -1 on failure.
 173   virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
 174
 175   // Sets a minimum delay in millisecond for packet buffer. The minimum is
 176   // maintained unless a higher latency is dictated by channel condition.
 177   // Returns true if the minimum is successfully applied, otherwise false is
 178   // returned.
 179   virtual bool SetMinimumDelay(int delay_ms) = 0;
 180
 181   // Sets a maximum delay in milliseconds for packet buffer. The latency will
 182   // not exceed the given value, even required delay (given the channel
 183   // conditions) is higher. Calling this method has the same effect as setting
 184   // the |max_delay_ms| value in the NetEq::Config struct.
 185   virtual bool SetMaximumDelay(int delay_ms) = 0;
 186
 187   // The smallest latency required. This is computed bases on inter-arrival
 188   // time and internal NetEq logic. Note that in computing this latency none of
 189   // the user defined limits (applied by calling setMinimumDelay() and/or
 190   // SetMaximumDelay()) are applied.
 191   virtual int LeastRequiredDelayMs() const = 0;
 192
 193   // Not implemented.
 194   virtual int SetTargetDelay() = 0;
 195
 196   // Not implemented.
 197   virtual int TargetDelay() = 0;
 198
 199   // Not implemented.
 200   virtual int CurrentDelay() = 0;
 201
 202   // Sets the playout mode to |mode|.
 203   virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
 204
 205   // Returns the current playout mode.
 206   virtual NetEqPlayoutMode PlayoutMode() const = 0;
 207
 208   // Writes the current network statistics to |stats|. The statistics are reset
 209   // after the call.
 210   virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
 211
 212   // Writes the last packet waiting times (in ms) to |waiting_times|. The number
 213   // of values written is no more than 100, but may be smaller if the interface
 214   // is polled again before 100 packets has arrived.
 215   virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
 216
 217   // Writes the current RTCP statistics to |stats|. The statistics are reset
 218   // and a new report period is started with the call.
 219   virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
 220
 221   // Same as RtcpStatistics(), but does not reset anything.
 222   virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
 223
 224   // Enables post-decode VAD. When enabled, GetAudio() will return
 225   // kOutputVADPassive when the signal contains no speech.
 226   virtual void EnableVad() = 0;
 227
 228   // Disables post-decode VAD.
 229   virtual void DisableVad() = 0;
 230
 231   // Returns the RTP timestamp for the last sample delivered by GetAudio().
 232   virtual uint32_t PlayoutTimestamp() = 0;
 233
 234   // Not implemented.
 235   virtual int SetTargetNumberOfChannels() = 0;
 236
 237   // Not implemented.
 238   virtual int SetTargetSampleRate() = 0;
 239
 240   // Returns the error code for the last occurred error. If no error has
 241   // occurred, 0 is returned.
 242   virtual int LastError() = 0;
 243
 244   // Returns the error code last returned by a decoder (audio or comfort noise).
 245   // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
 246   // this method to get the decoder's error code.
 247   virtual int LastDecoderError() = 0;
 248
 249   // Flushes both the packet buffer and the sync buffer.
 250   virtual void FlushBuffers() = 0;
 251
 252   // Current usage of packet-buffer and it's limits.
 253   virtual void PacketBufferStatistics(int* current_num_packets,
 254                                       int* max_num_packets) const = 0;
 255
 256   // Get sequence number and timestamp of the latest RTP.
 257   // This method is to facilitate NACK.
 258   virtual int DecodedRtpInfo(int* sequence_number,
 259                              uint32_t* timestamp) const = 0;
 260
 261   // Sets the background noise mode.
 262   virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode) = 0;
 263
 264   // Gets the background noise mode.
 265   virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const = 0;
 266
 267  protected:
 268   NetEq() {}
 269
 270  private:
 271   DISALLOW_COPY_AND_ASSIGN(NetEq);
 272 };
 273
 274 }  // namespace webrtc
 275 #endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_