src/third_party/webrtc/modules/audio_coding/main/interface/audio_coding_module.h

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
  12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
  13
  14 #include <vector>
  15
  16 #include "webrtc/common_types.h"
  17 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
  18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
  19 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
  20 #include "webrtc/modules/interface/module.h"
  21 #include "webrtc/system_wrappers/interface/clock.h"
  22 #include "webrtc/typedefs.h"
  23
  24 namespace webrtc {
  25
  26 // forward declarations
  27 struct CodecInst;
  28 struct WebRtcRTPHeader;
  29 class AudioFrame;
  30 class RTPFragmentationHeader;
  31
  32 #define WEBRTC_10MS_PCM_AUDIO 960  // 16 bits super wideband 48 kHz
  33
  34 // Callback class used for sending data ready to be packetized
  35 class AudioPacketizationCallback {
  36  public:
  37   virtual ~AudioPacketizationCallback() {}
  38
  39   virtual int32_t SendData(
  40       FrameType frame_type,
  41       uint8_t payload_type,
  42       uint32_t timestamp,
  43       const uint8_t* payload_data,
  44       uint16_t payload_len_bytes,
  45       const RTPFragmentationHeader* fragmentation) = 0;
  46 };
  47
  48 // Callback class used for inband Dtmf detection
  49 class AudioCodingFeedback {
  50  public:
  51   virtual ~AudioCodingFeedback() {}
  52
  53   virtual int32_t IncomingDtmf(const uint8_t digit_dtmf,
  54                                const bool end) = 0;
  55 };
  56
  57 // Callback class used for reporting VAD decision
  58 class ACMVADCallback {
  59  public:
  60   virtual ~ACMVADCallback() {}
  61
  62   virtual int32_t InFrameType(int16_t frameType) = 0;
  63 };
  64
  65 // Callback class used for reporting receiver statistics
  66 class ACMVQMonCallback {
  67  public:
  68   virtual ~ACMVQMonCallback() {}
  69
  70   virtual int32_t NetEqStatistics(
  71       const int32_t id,  // current ACM id
  72       const uint16_t MIUsValid,  // valid voice duration in ms
  73       const uint16_t MIUsReplaced,  // concealed voice duration in ms
  74       const uint8_t eventFlags,  // concealed voice flags
  75       const uint16_t delayMS) = 0;  // average delay in ms
  76 };
  77
  78 class AudioCodingModule: public Module {
  79  protected:
  80   AudioCodingModule() {}
  81
  82  public:
  83   struct Config {
  84     Config()
  85         : id(0),
  86           neteq_config(),
  87           clock(Clock::GetRealTimeClock()) {}
  88
  89     int id;
  90     NetEq::Config neteq_config;
  91     Clock* clock;
  92   };
  93
  94   ///////////////////////////////////////////////////////////////////////////
  95   // Creation and destruction of a ACM.
  96   //
  97   // The second method is used for testing where a simulated clock can be
  98   // injected into ACM. ACM will take the ownership of the object clock and
  99   // delete it when destroyed.
 100   //
 101   static AudioCodingModule* Create(int id);
 102   static AudioCodingModule* Create(int id, Clock* clock);
 103   virtual ~AudioCodingModule() {};
 104
 105   ///////////////////////////////////////////////////////////////////////////
 106   //   Utility functions
 107   //
 108
 109   ///////////////////////////////////////////////////////////////////////////
 110   // uint8_t NumberOfCodecs()
 111   // Returns number of supported codecs.
 112   //
 113   // Return value:
 114   //   number of supported codecs.
 115   ///
 116   static int NumberOfCodecs();
 117
 118   ///////////////////////////////////////////////////////////////////////////
 119   // int32_t Codec()
 120   // Get supported codec with list number.
 121   //
 122   // Input:
 123   //   -list_id             : list number.
 124   //
 125   // Output:
 126   //   -codec              : a structure where the parameters of the codec,
 127   //                         given by list number is written to.
 128   //
 129   // Return value:
 130   //   -1 if the list number (list_id) is invalid.
 131   //    0 if succeeded.
 132   //
 133   static int Codec(int list_id, CodecInst* codec);
 134
 135   ///////////////////////////////////////////////////////////////////////////
 136   // int32_t Codec()
 137   // Get supported codec with the given codec name, sampling frequency, and
 138   // a given number of channels.
 139   //
 140   // Input:
 141   //   -payload_name       : name of the codec.
 142   //   -sampling_freq_hz   : sampling frequency of the codec. Note! for RED
 143   //                         a sampling frequency of -1 is a valid input.
 144   //   -channels           : number of channels ( 1 - mono, 2 - stereo).
 145   //
 146   // Output:
 147   //   -codec              : a structure where the function returns the
 148   //                         default parameters of the codec.
 149   //
 150   // Return value:
 151   //   -1 if no codec matches the given parameters.
 152   //    0 if succeeded.
 153   //
 154   static int Codec(const char* payload_name, CodecInst* codec,
 155                        int sampling_freq_hz, int channels);
 156
 157   ///////////////////////////////////////////////////////////////////////////
 158   // int32_t Codec()
 159   //
 160   // Returns the list number of the given codec name, sampling frequency, and
 161   // a given number of channels.
 162   //
 163   // Input:
 164   //   -payload_name        : name of the codec.
 165   //   -sampling_freq_hz    : sampling frequency of the codec. Note! for RED
 166   //                          a sampling frequency of -1 is a valid input.
 167   //   -channels            : number of channels ( 1 - mono, 2 - stereo).
 168   //
 169   // Return value:
 170   //   if the codec is found, the index of the codec in the list,
 171   //   -1 if the codec is not found.
 172   //
 173   static int Codec(const char* payload_name, int sampling_freq_hz,
 174                              int channels);
 175
 176   ///////////////////////////////////////////////////////////////////////////
 177   // bool IsCodecValid()
 178   // Checks the validity of the parameters of the given codec.
 179   //
 180   // Input:
 181   //   -codec              : the structure which keeps the parameters of the
 182   //                         codec.
 183   //
 184   // Return value:
 185   //   true if the parameters are valid,
 186   //   false if any parameter is not valid.
 187   //
 188   static bool IsCodecValid(const CodecInst& codec);
 189
 190   ///////////////////////////////////////////////////////////////////////////
 191   //   Sender
 192   //
 193
 194   ///////////////////////////////////////////////////////////////////////////
 195   // int32_t InitializeSender()
 196   // Any encoder-related state of ACM will be initialized to the
 197   // same state when ACM is created. This will not interrupt or
 198   // effect decoding functionality of ACM. ACM will lose all the
 199   // encoding-related settings by calling this function.
 200   // For instance, a send codec has to be registered again.
 201   //
 202   // Return value:
 203   //   -1 if failed to initialize,
 204   //    0 if succeeded.
 205   //
 206   virtual int32_t InitializeSender() = 0;
 207
 208   ///////////////////////////////////////////////////////////////////////////
 209   // int32_t ResetEncoder()
 210   // This API resets the states of encoder. All the encoder settings, such as
 211   // send-codec or VAD/DTX, will be preserved.
 212   //
 213   // Return value:
 214   //   -1 if failed to initialize,
 215   //    0 if succeeded.
 216   //
 217   virtual int32_t ResetEncoder() = 0;
 218
 219   ///////////////////////////////////////////////////////////////////////////
 220   // int32_t RegisterSendCodec()
 221   // Registers a codec, specified by |send_codec|, as sending codec.
 222   // This API can be called multiple of times to register Codec. The last codec
 223   // registered overwrites the previous ones.
 224   // The API can also be used to change payload type for CNG and RED, which are
 225   // registered by default to default payload types.
 226   // Note that registering CNG and RED won't overwrite speech codecs.
 227   // This API can be called to set/change the send payload-type, frame-size
 228   // or encoding rate (if applicable for the codec).
 229   //
 230   // Note: If a stereo codec is registered as send codec, VAD/DTX will
 231   // automatically be turned off, since it is not supported for stereo sending.
 232   //
 233   // Note: If a secondary encoder is already registered, and the new send-codec
 234   // has a sampling rate that does not match the secondary encoder, the
 235   // secondary encoder will be unregistered.
 236   //
 237   // Input:
 238   //   -send_codec         : Parameters of the codec to be registered, c.f.
 239   //                         common_types.h for the definition of
 240   //                         CodecInst.
 241   //
 242   // Return value:
 243   //   -1 if failed to initialize,
 244   //    0 if succeeded.
 245   //
 246   virtual int32_t RegisterSendCodec(const CodecInst& send_codec) = 0;
 247
 248   ///////////////////////////////////////////////////////////////////////////
 249   // int RegisterSecondarySendCodec()
 250   // Register a secondary encoder to enable dual-streaming. If a secondary
 251   // codec is already registered, it will be removed before the new one is
 252   // registered.
 253   //
 254   // Note: The secondary encoder will be unregistered if a primary codec
 255   // is set with a sampling rate which does not match that of the existing
 256   // secondary codec.
 257   //
 258   // Input:
 259   //   -send_codec         : Parameters of the codec to be registered, c.f.
 260   //                         common_types.h for the definition of
 261   //                         CodecInst.
 262   //
 263   // Return value:
 264   //   -1 if failed to register,
 265   //    0 if succeeded.
 266   //
 267   virtual int RegisterSecondarySendCodec(const CodecInst& send_codec) = 0;
 268
 269   ///////////////////////////////////////////////////////////////////////////
 270   // void UnregisterSecondarySendCodec()
 271   // Unregister the secondary encoder to disable dual-streaming.
 272   //
 273   virtual void UnregisterSecondarySendCodec() = 0;
 274
 275   ///////////////////////////////////////////////////////////////////////////
 276   // int32_t SendCodec()
 277   // Get parameters for the codec currently registered as send codec.
 278   //
 279   // Output:
 280   //   -current_send_codec          : parameters of the send codec.
 281   //
 282   // Return value:
 283   //   -1 if failed to get send codec,
 284   //    0 if succeeded.
 285   //
 286   virtual int32_t SendCodec(CodecInst* current_send_codec) const = 0;
 287
 288   ///////////////////////////////////////////////////////////////////////////
 289   // int SecondarySendCodec()
 290   // Get the codec parameters for the current secondary send codec.
 291   //
 292   // Output:
 293   //   -secondary_codec          : parameters of the secondary send codec.
 294   //
 295   // Return value:
 296   //   -1 if failed to get send codec,
 297   //    0 if succeeded.
 298   //
 299   virtual int SecondarySendCodec(CodecInst* secondary_codec) const = 0;
 300
 301   ///////////////////////////////////////////////////////////////////////////
 302   // int32_t SendFrequency()
 303   // Get the sampling frequency of the current encoder in Hertz.
 304   //
 305   // Return value:
 306   //   positive; sampling frequency [Hz] of the current encoder.
 307   //   -1 if an error has happened.
 308   //
 309   virtual int32_t SendFrequency() const = 0;
 310
 311   ///////////////////////////////////////////////////////////////////////////
 312   // int32_t Bitrate()
 313   // Get encoding bit-rate in bits per second.
 314   //
 315   // Return value:
 316   //   positive; encoding rate in bits/sec,
 317   //   -1 if an error is happened.
 318   //
 319   virtual int32_t SendBitrate() const = 0;
 320
 321   ///////////////////////////////////////////////////////////////////////////
 322   // int32_t SetReceivedEstimatedBandwidth()
 323   // Set available bandwidth [bits/sec] of the up-link channel.
 324   // This information is used for traffic shaping, and is currently only
 325   // supported if iSAC is the send codec.
 326   //
 327   // Input:
 328   //   -bw                 : bandwidth in bits/sec estimated for
 329   //                         up-link.
 330   // Return value
 331   //   -1 if error occurred in setting the bandwidth,
 332   //    0 bandwidth is set successfully.
 333   //
 334   // TODO(henrik.lundin) Unused. Remove?
 335   virtual int32_t SetReceivedEstimatedBandwidth(
 336       const int32_t bw) = 0;
 337
 338   ///////////////////////////////////////////////////////////////////////////
 339   // int32_t RegisterTransportCallback()
 340   // Register a transport callback which will be called to deliver
 341   // the encoded buffers whenever Process() is called and a
 342   // bit-stream is ready.
 343   //
 344   // Input:
 345   //   -transport          : pointer to the callback class
 346   //                         transport->SendData() is called whenever
 347   //                         Process() is called and bit-stream is ready
 348   //                         to deliver.
 349   //
 350   // Return value:
 351   //   -1 if the transport callback could not be registered
 352   //    0 if registration is successful.
 353   //
 354   virtual int32_t RegisterTransportCallback(
 355       AudioPacketizationCallback* transport) = 0;
 356
 357   ///////////////////////////////////////////////////////////////////////////
 358   // int32_t Add10MsData()
 359   // Add 10MS of raw (PCM) audio data to the encoder. If the sampling
 360   // frequency of the audio does not match the sampling frequency of the
 361   // current encoder ACM will resample the audio.
 362   //
 363   // Input:
 364   //   -audio_frame        : the input audio frame, containing raw audio
 365   //                         sampling frequency etc.,
 366   //                         c.f. module_common_types.h for definition of
 367   //                         AudioFrame.
 368   //
 369   // Return value:
 370   //      0   successfully added the frame.
 371   //     -1   some error occurred and data is not added.
 372   //   < -1   to add the frame to the buffer n samples had to be
 373   //          overwritten, -n is the return value in this case.
 374   //
 375   virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
 376
 377   ///////////////////////////////////////////////////////////////////////////
 378   // (RED) Redundant Coding
 379   //
 380
 381   ///////////////////////////////////////////////////////////////////////////
 382   // int32_t SetREDStatus()
 383   // configure RED status i.e. on/off.
 384   //
 385   // RFC 2198 describes a solution which has a single payload type which
 386   // signifies a packet with redundancy. That packet then becomes a container,
 387   // encapsulating multiple payloads into a single RTP packet.
 388   // Such a scheme is flexible, since any amount of redundancy may be
 389   // encapsulated within a single packet.  There is, however, a small overhead
 390   // since each encapsulated payload must be preceded by a header indicating
 391   // the type of data enclosed.
 392   //
 393   // Input:
 394   //   -enable_red         : if true RED is enabled, otherwise RED is
 395   //                         disabled.
 396   //
 397   // Return value:
 398   //   -1 if failed to set RED status,
 399   //    0 if succeeded.
 400   //
 401   virtual int32_t SetREDStatus(bool enable_red) = 0;
 402
 403   ///////////////////////////////////////////////////////////////////////////
 404   // bool REDStatus()
 405   // Get RED status
 406   //
 407   // Return value:
 408   //   true if RED is enabled,
 409   //   false if RED is disabled.
 410   //
 411   virtual bool REDStatus() const = 0;
 412
 413   ///////////////////////////////////////////////////////////////////////////
 414   // (FEC) Forward Error Correction (codec internal)
 415   //
 416
 417   ///////////////////////////////////////////////////////////////////////////
 418   // int32_t SetCodecFEC()
 419   // Configures codec internal FEC status i.e. on/off. No effects on codecs that
 420   // do not provide internal FEC.
 421   //
 422   // Input:
 423   //   -enable_fec         : if true FEC will be enabled otherwise the FEC is
 424   //                         disabled.
 425   //
 426   // Return value:
 427   //   -1 if failed, or the codec does not support FEC
 428   //    0 if succeeded.
 429   //
 430   virtual int SetCodecFEC(bool enable_codec_fec) = 0;
 431
 432   ///////////////////////////////////////////////////////////////////////////
 433   // bool CodecFEC()
 434   // Gets status of codec internal FEC.
 435   //
 436   // Return value:
 437   //   true if FEC is enabled,
 438   //   false if FEC is disabled.
 439   //
 440   virtual bool CodecFEC() const = 0;
 441
 442   ///////////////////////////////////////////////////////////////////////////
 443   // int SetPacketLossRate()
 444   // Sets expected packet loss rate for encoding. Some encoders provide packet
 445   // loss gnostic encoding to make stream less sensitive to packet losses,
 446   // through e.g., FEC. No effects on codecs that do not provide such encoding.
 447   //
 448   // Input:
 449   //   -packet_loss_rate   : expected packet loss rate (0 -- 100 inclusive).
 450   //
 451   // Return value
 452   //   -1 if failed to set packet loss rate,
 453   //   0 if succeeded.
 454   //
 455   virtual int SetPacketLossRate(int packet_loss_rate) = 0;
 456
 457   ///////////////////////////////////////////////////////////////////////////
 458   //   (VAD) Voice Activity Detection
 459   //
 460
 461   ///////////////////////////////////////////////////////////////////////////
 462   // int32_t SetVAD()
 463   // If DTX is enabled & the codec does not have internal DTX/VAD
 464   // WebRtc VAD will be automatically enabled and |enable_vad| is ignored.
 465   //
 466   // If DTX is disabled but VAD is enabled no DTX packets are send,
 467   // regardless of whether the codec has internal DTX/VAD or not. In this
 468   // case, WebRtc VAD is running to label frames as active/in-active.
 469   //
 470   // NOTE! VAD/DTX is not supported when sending stereo.
 471   //
 472   // Inputs:
 473   //   -enable_dtx         : if true DTX is enabled,
 474   //                         otherwise DTX is disabled.
 475   //   -enable_vad         : if true VAD is enabled,
 476   //                         otherwise VAD is disabled.
 477   //   -vad_mode           : determines the aggressiveness of VAD. A more
 478   //                         aggressive mode results in more frames labeled
 479   //                         as in-active, c.f. definition of
 480   //                         ACMVADMode in audio_coding_module_typedefs.h
 481   //                         for valid values.
 482   //
 483   // Return value:
 484   //   -1 if failed to set up VAD/DTX,
 485   //    0 if succeeded.
 486   //
 487   virtual int32_t SetVAD(const bool enable_dtx = true,
 488                                const bool enable_vad = false,
 489                                const ACMVADMode vad_mode = VADNormal) = 0;
 490
 491   ///////////////////////////////////////////////////////////////////////////
 492   // int32_t VAD()
 493   // Get VAD status.
 494   //
 495   // Outputs:
 496   //   -dtx_enabled        : is set to true if DTX is enabled, otherwise
 497   //                         is set to false.
 498   //   -vad_enabled        : is set to true if VAD is enabled, otherwise
 499   //                         is set to false.
 500   //   -vad_mode            : is set to the current aggressiveness of VAD.
 501   //
 502   // Return value:
 503   //   -1 if fails to retrieve the setting of DTX/VAD,
 504   //    0 if succeeded.
 505   //
 506   virtual int32_t VAD(bool* dtx_enabled, bool* vad_enabled,
 507                             ACMVADMode* vad_mode) const = 0;
 508
 509   ///////////////////////////////////////////////////////////////////////////
 510   // int32_t ReplaceInternalDTXWithWebRtc()
 511   // Used to replace codec internal DTX scheme with WebRtc. This is only
 512   // supported for G729, where this call replaces AnnexB with WebRtc DTX.
 513   //
 514   // Input:
 515   //   -use_webrtc_dtx     : if false (default) the codec built-in DTX/VAD
 516   //                         scheme is used, otherwise the internal DTX is
 517   //                         replaced with WebRtc DTX/VAD.
 518   //
 519   // Return value:
 520   //   -1 if failed to replace codec internal DTX with WebRtc,
 521   //    0 if succeeded.
 522   //
 523   virtual int32_t ReplaceInternalDTXWithWebRtc(
 524       const bool use_webrtc_dtx = false) = 0;
 525
 526   ///////////////////////////////////////////////////////////////////////////
 527   // int32_t IsInternalDTXReplacedWithWebRtc()
 528   // Get status if the codec internal DTX (when such exists) is replaced with
 529   // WebRtc DTX. This is only supported for G729.
 530   //
 531   // Output:
 532   //   -uses_webrtc_dtx    : is set to true if the codec internal DTX is
 533   //                         replaced with WebRtc DTX/VAD, otherwise it is set
 534   //                         to false.
 535   //
 536   // Return value:
 537   //   -1 if failed to determine if codec internal DTX is replaced with WebRtc,
 538   //    0 if succeeded.
 539   //
 540   virtual int32_t IsInternalDTXReplacedWithWebRtc(
 541       bool* uses_webrtc_dtx) = 0;
 542
 543   ///////////////////////////////////////////////////////////////////////////
 544   // int32_t RegisterVADCallback()
 545   // Call this method to register a callback function which is called
 546   // any time that ACM encounters an empty frame. That is a frame which is
 547   // recognized inactive. Depending on the codec WebRtc VAD or internal codec
 548   // VAD is employed to identify a frame as active/inactive.
 549   //
 550   // Input:
 551   //   -vad_callback        : pointer to a callback function.
 552   //
 553   // Return value:
 554   //   -1 if failed to register the callback function.
 555   //    0 if the callback function is registered successfully.
 556   //
 557   virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
 558
 559   ///////////////////////////////////////////////////////////////////////////
 560   //   Receiver
 561   //
 562
 563   ///////////////////////////////////////////////////////////////////////////
 564   // int32_t InitializeReceiver()
 565   // Any decoder-related state of ACM will be initialized to the
 566   // same state when ACM is created. This will not interrupt or
 567   // effect encoding functionality of ACM. ACM would lose all the
 568   // decoding-related settings by calling this function.
 569   // For instance, all registered codecs are deleted and have to be
 570   // registered again.
 571   //
 572   // Return value:
 573   //   -1 if failed to initialize,
 574   //    0 if succeeded.
 575   //
 576   virtual int32_t InitializeReceiver() = 0;
 577
 578   ///////////////////////////////////////////////////////////////////////////
 579   // int32_t ResetDecoder()
 580   // This API resets the states of decoders. ACM will not lose any
 581   // decoder-related settings, such as registered codecs.
 582   //
 583   // Return value:
 584   //   -1 if failed to initialize,
 585   //    0 if succeeded.
 586   //
 587   virtual int32_t ResetDecoder() = 0;
 588
 589   ///////////////////////////////////////////////////////////////////////////
 590   // int32_t ReceiveFrequency()
 591   // Get sampling frequency of the last received payload.
 592   //
 593   // Return value:
 594   //   non-negative the sampling frequency in Hertz.
 595   //   -1 if an error has occurred.
 596   //
 597   virtual int32_t ReceiveFrequency() const = 0;
 598
 599   ///////////////////////////////////////////////////////////////////////////
 600   // int32_t PlayoutFrequency()
 601   // Get sampling frequency of audio played out.
 602   //
 603   // Return value:
 604   //   the sampling frequency in Hertz.
 605   //
 606   virtual int32_t PlayoutFrequency() const = 0;
 607
 608   ///////////////////////////////////////////////////////////////////////////
 609   // int32_t RegisterReceiveCodec()
 610   // Register possible decoders, can be called multiple times for
 611   // codecs, CNG-NB, CNG-WB, CNG-SWB, AVT and RED.
 612   //
 613   // Input:
 614   //   -receive_codec      : parameters of the codec to be registered, c.f.
 615   //                         common_types.h for the definition of
 616   //                         CodecInst.
 617   //
 618   // Return value:
 619   //   -1 if failed to register the codec
 620   //    0 if the codec registered successfully.
 621   //
 622   virtual int32_t RegisterReceiveCodec(
 623       const CodecInst& receive_codec) = 0;
 624
 625   ///////////////////////////////////////////////////////////////////////////
 626   // int32_t UnregisterReceiveCodec()
 627   // Unregister the codec currently registered with a specific payload type
 628   // from the list of possible receive codecs.
 629   //
 630   // Input:
 631   //   -payload_type        : The number representing the payload type to
 632   //                         unregister.
 633   //
 634   // Output:
 635   //   -1 if fails to unregister.
 636   //    0 if the given codec is successfully unregistered.
 637   //
 638   virtual int UnregisterReceiveCodec(
 639       uint8_t payload_type) = 0;
 640
 641   ///////////////////////////////////////////////////////////////////////////
 642   // int32_t ReceiveCodec()
 643   // Get the codec associated with last received payload.
 644   //
 645   // Output:
 646   //   -curr_receive_codec : parameters of the codec associated with the last
 647   //                         received payload, c.f. common_types.h for
 648   //                         the definition of CodecInst.
 649   //
 650   // Return value:
 651   //   -1 if failed to retrieve the codec,
 652   //    0 if the codec is successfully retrieved.
 653   //
 654   virtual int32_t ReceiveCodec(CodecInst* curr_receive_codec) const = 0;
 655
 656   ///////////////////////////////////////////////////////////////////////////
 657   // int32_t IncomingPacket()
 658   // Call this function to insert a parsed RTP packet into ACM.
 659   //
 660   // Inputs:
 661   //   -incoming_payload   : received payload.
 662   //   -payload_len_bytes  : the length of payload in bytes.
 663   //   -rtp_info           : the relevant information retrieved from RTP
 664   //                         header.
 665   //
 666   // Return value:
 667   //   -1 if failed to push in the payload
 668   //    0 if payload is successfully pushed in.
 669   //
 670   virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
 671                                        const int32_t payload_len_bytes,
 672                                        const WebRtcRTPHeader& rtp_info) = 0;
 673
 674   ///////////////////////////////////////////////////////////////////////////
 675   // int32_t IncomingPayload()
 676   // Call this API to push incoming payloads when there is no rtp-info.
 677   // The rtp-info will be created in ACM. One usage for this API is when
 678   // pre-encoded files are pushed in ACM
 679   //
 680   // Inputs:
 681   //   -incoming_payload   : received payload.
 682   //   -payload_len_byte   : the length, in bytes, of the received payload.
 683   //   -payload_type       : the payload-type. This specifies which codec has
 684   //                         to be used to decode the payload.
 685   //   -timestamp          : send timestamp of the payload. ACM starts with
 686   //                         a random value and increment it by the
 687   //                         packet-size, which is given when the codec in
 688   //                         question is registered by RegisterReceiveCodec().
 689   //                         Therefore, it is essential to have the timestamp
 690   //                         if the frame-size differ from the registered
 691   //                         value or if the incoming payload contains DTX
 692   //                         packets.
 693   //
 694   // Return value:
 695   //   -1 if failed to push in the payload
 696   //    0 if payload is successfully pushed in.
 697   //
 698   virtual int32_t IncomingPayload(const uint8_t* incoming_payload,
 699                                         const int32_t payload_len_byte,
 700                                         const uint8_t payload_type,
 701                                         const uint32_t timestamp = 0) = 0;
 702
 703   ///////////////////////////////////////////////////////////////////////////
 704   // int SetMinimumPlayoutDelay()
 705   // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
 706   // such a delay unless channel condition yields to a higher delay.
 707   //
 708   // Input:
 709   //   -time_ms            : minimum delay in milliseconds.
 710   //
 711   // Return value:
 712   //   -1 if failed to set the delay,
 713   //    0 if the minimum delay is set.
 714   //
 715   virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
 716
 717   ///////////////////////////////////////////////////////////////////////////
 718   // int SetMaximumPlayoutDelay()
 719   // Set a maximum for the playout delay
 720   //
 721   // Input:
 722   //   -time_ms            : maximum delay in milliseconds.
 723   //
 724   // Return value:
 725   //   -1 if failed to set the delay,
 726   //    0 if the maximum delay is set.
 727   //
 728   virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
 729
 730   //
 731   // The shortest latency, in milliseconds, required by jitter buffer. This
 732   // is computed based on inter-arrival times and playout mode of NetEq. The
 733   // actual delay is the maximum of least-required-delay and the minimum-delay
 734   // specified by SetMinumumPlayoutDelay() API.
 735   //
 736   virtual int LeastRequiredDelayMs() const = 0;
 737
 738   ///////////////////////////////////////////////////////////////////////////
 739   // int32_t SetDtmfPlayoutStatus()
 740   // Configure DTMF playout, i.e. whether out-of-band
 741   // DTMF tones are played or not.
 742   //
 743   // Input:
 744   //   -enable             : if true to enable playout out-of-band DTMF tones,
 745   //                         false to disable.
 746   //
 747   // Return value:
 748   //   -1 if the method fails, e.g. DTMF playout is not supported.
 749   //    0 if the status is set successfully.
 750   //
 751   virtual int32_t SetDtmfPlayoutStatus(const bool enable) = 0;
 752
 753   ///////////////////////////////////////////////////////////////////////////
 754   // bool DtmfPlayoutStatus()
 755   // Get Dtmf playout status.
 756   //
 757   // Return value:
 758   //   true if out-of-band Dtmf tones are played,
 759   //   false if playout of Dtmf tones is disabled.
 760   //
 761   virtual bool DtmfPlayoutStatus() const = 0;
 762
 763   ///////////////////////////////////////////////////////////////////////////
 764   // int32_t PlayoutTimestamp()
 765   // The send timestamp of an RTP packet is associated with the decoded
 766   // audio of the packet in question. This function returns the timestamp of
 767   // the latest audio obtained by calling PlayoutData10ms().
 768   //
 769   // Input:
 770   //   -timestamp          : a reference to a uint32_t to receive the
 771   //                         timestamp.
 772   // Return value:
 773   //    0 if the output is a correct timestamp.
 774   //   -1 if failed to output the correct timestamp.
 775   //
 776   // TODO(tlegrand): Change function to return the timestamp.
 777   virtual int32_t PlayoutTimestamp(uint32_t* timestamp) = 0;
 778
 779   ///////////////////////////////////////////////////////////////////////////
 780   // int32_t DecoderEstimatedBandwidth()
 781   // Get the estimate of the Bandwidth, in bits/second, based on the incoming
 782   // stream. This API is useful in one-way communication scenarios, where
 783   // the bandwidth information is sent in an out-of-band fashion.
 784   // Currently only supported if iSAC is registered as a receiver.
 785   //
 786   // Return value:
 787   //   >0 bandwidth in bits/second.
 788   //   -1 if failed to get a bandwidth estimate.
 789   //
 790   virtual int32_t DecoderEstimatedBandwidth() const = 0;
 791
 792   ///////////////////////////////////////////////////////////////////////////
 793   // int32_t SetPlayoutMode()
 794   // Call this API to set the playout mode. Playout mode could be optimized
 795   // for i) voice, ii) FAX or iii) streaming. In Voice mode, NetEQ is
 796   // optimized to deliver highest audio quality while maintaining a minimum
 797   // delay. In FAX mode, NetEQ is optimized to have few delay changes as
 798   // possible and maintain a constant delay, perhaps large relative to voice
 799   // mode, to avoid PLC. In streaming mode, we tolerate a little more delay
 800   // to achieve better jitter robustness.
 801   //
 802   // Input:
 803   //   -mode               : playout mode. Possible inputs are:
 804   //                         "voice",
 805   //                         "fax" and
 806   //                         "streaming".
 807   //
 808   // Return value:
 809   //   -1 if failed to set the mode,
 810   //    0 if succeeding.
 811   //
 812   virtual int32_t SetPlayoutMode(const AudioPlayoutMode mode) = 0;
 813
 814   ///////////////////////////////////////////////////////////////////////////
 815   // AudioPlayoutMode PlayoutMode()
 816   // Get playout mode, i.e. whether it is speech, FAX or streaming. See
 817   // audio_coding_module_typedefs.h for definition of AudioPlayoutMode.
 818   //
 819   // Return value:
 820   //   voice:       is for voice output,
 821   //   fax:         a mode that is optimized for receiving FAX signals.
 822   //                In this mode NetEq tries to maintain a constant high
 823   //                delay to avoid PLC if possible.
 824   //   streaming:   a mode that is suitable for streaming. In this mode we
 825   //                accept longer delay to improve jitter robustness.
 826   //
 827   virtual AudioPlayoutMode PlayoutMode() const = 0;
 828
 829   ///////////////////////////////////////////////////////////////////////////
 830   // int32_t PlayoutData10Ms(
 831   // Get 10 milliseconds of raw audio data for playout, at the given sampling
 832   // frequency. ACM will perform a resampling if required.
 833   //
 834   // Input:
 835   //   -desired_freq_hz    : the desired sampling frequency, in Hertz, of the
 836   //                         output audio. If set to -1, the function returns
 837   //                         the audio at the current sampling frequency.
 838   //
 839   // Output:
 840   //   -audio_frame        : output audio frame which contains raw audio data
 841   //                         and other relevant parameters, c.f.
 842   //                         module_common_types.h for the definition of
 843   //                         AudioFrame.
 844   //
 845   // Return value:
 846   //   -1 if the function fails,
 847   //    0 if the function succeeds.
 848   //
 849   virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
 850                                         AudioFrame* audio_frame) = 0;
 851
 852   ///////////////////////////////////////////////////////////////////////////
 853   //   Codec specific
 854   //
 855
 856   ///////////////////////////////////////////////////////////////////////////
 857   // int32_t SetISACMaxRate()
 858   // Set the maximum instantaneous rate of iSAC. For a payload of B bits
 859   // with a frame-size of T sec the instantaneous rate is B/T bits per
 860   // second. Therefore, (B/T < |max_rate_bps|) and
 861   // (B < |max_payload_len_bytes| * 8) are always satisfied for iSAC payloads,
 862   // c.f SetISACMaxPayloadSize().
 863   //
 864   // Input:
 865   //   -max_rate_bps       : maximum instantaneous bit-rate given in bits/sec.
 866   //
 867   // Return value:
 868   //   -1 if failed to set the maximum rate.
 869   //    0 if the maximum rate is set successfully.
 870   //
 871   virtual int SetISACMaxRate(int max_rate_bps) = 0;
 872
 873   ///////////////////////////////////////////////////////////////////////////
 874   // int32_t SetISACMaxPayloadSize()
 875   // Set the maximum payload size of iSAC packets. No iSAC payload,
 876   // regardless of its frame-size, may exceed the given limit. For
 877   // an iSAC payload of size B bits and frame-size T seconds we have;
 878   // (B < |max_payload_len_bytes| * 8) and (B/T < |max_rate_bps|), c.f.
 879   // SetISACMaxRate().
 880   //
 881   // Input:
 882   //   -max_payload_len_bytes : maximum payload size in bytes.
 883   //
 884   // Return value:
 885   //   -1 if failed to set the maximum  payload-size.
 886   //    0 if the given length is set successfully.
 887   //
 888   virtual int SetISACMaxPayloadSize(int max_payload_len_bytes) = 0;
 889
 890   ///////////////////////////////////////////////////////////////////////////
 891   // int32_t ConfigISACBandwidthEstimator()
 892   // Call this function to configure the bandwidth estimator of ISAC.
 893   // During the adaptation of bit-rate, iSAC automatically adjusts the
 894   // frame-size (either 30 or 60 ms) to save on RTP header. The initial
 895   // frame-size can be specified by the first argument. The configuration also
 896   // regards the initial estimate of bandwidths. The estimator starts from
 897   // this point and converges to the actual bottleneck. This is given by the
 898   // second parameter. Furthermore, it is also possible to control the
 899   // adaptation of frame-size. This is specified by the last parameter.
 900   //
 901   // Input:
 902   //   -init_frame_size_ms : initial frame-size in milliseconds. For iSAC-wb
 903   //                         30 ms and 60 ms (default) are acceptable values,
 904   //                         and for iSAC-swb 30 ms is the only acceptable
 905   //                         value. Zero indicates default value.
 906   //   -init_rate_bps      : initial estimate of the bandwidth. Values
 907   //                         between 10000 and 58000 are acceptable.
 908   //   -enforce_srame_size : if true, the frame-size will not be adapted.
 909   //
 910   // Return value:
 911   //   -1 if failed to configure the bandwidth estimator,
 912   //    0 if the configuration was successfully applied.
 913   //
 914   virtual int32_t ConfigISACBandwidthEstimator(
 915       int init_frame_size_ms,
 916       int init_rate_bps,
 917       bool enforce_frame_size = false) = 0;
 918
 919   ///////////////////////////////////////////////////////////////////////////
 920   // int SetOpusMaxPlaybackRate()
 921   // If current send codec is Opus, informs it about maximum playback rate the
 922   // receiver will render. Opus can use this information to optimize the bit
 923   // rate and increase the computation efficiency.
 924   //
 925   // Input:
 926   //   -frequency_hz            : maximum playback rate in Hz.
 927   //
 928   // Return value:
 929   //   -1 if current send codec is not Opus or
 930   //      error occurred in setting the maximum playback rate,
 931   //    0 maximum bandwidth is set successfully.
 932   //
 933   virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;
 934
 935   ///////////////////////////////////////////////////////////////////////////
 936   //   statistics
 937   //
 938
 939   ///////////////////////////////////////////////////////////////////////////
 940   // int32_t  NetworkStatistics()
 941   // Get network statistics. Note that the internal statistics of NetEq are
 942   // reset by this call.
 943   //
 944   // Input:
 945   //   -network_statistics : a structure that contains network statistics.
 946   //
 947   // Return value:
 948   //   -1 if failed to set the network statistics,
 949   //    0 if statistics are set successfully.
 950   //
 951   virtual int32_t NetworkStatistics(
 952       ACMNetworkStatistics* network_statistics) = 0;
 953
 954   //
 955   // Set an initial delay for playout.
 956   // An initial delay yields ACM playout silence until equivalent of |delay_ms|
 957   // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
 958   // from NetEq in its regular fashion, and the given delay is maintained
 959   // through out the call, unless channel conditions yield to a higher jitter
 960   // buffer delay.
 961   //
 962   // Input:
 963   //   -delay_ms           : delay in milliseconds.
 964   //
 965   // Return values:
 966   //   -1 if failed to set the delay.
 967   //    0 if delay is set successfully.
 968   //
 969   virtual int SetInitialPlayoutDelay(int delay_ms) = 0;
 970
 971   //
 972   // Enable NACK and set the maximum size of the NACK list. If NACK is already
 973   // enable then the maximum NACK list size is modified accordingly.
 974   //
 975   // If the sequence number of last received packet is N, the sequence numbers
 976   // of NACK list are in the range of [N - |max_nack_list_size|, N).
 977   //
 978   // |max_nack_list_size| should be positive (none zero) and less than or
 979   // equal to |Nack::kNackListSizeLimit|. Otherwise, No change is applied and -1
 980   // is returned. 0 is returned at success.
 981   //
 982   virtual int EnableNack(size_t max_nack_list_size) = 0;
 983
 984   // Disable NACK.
 985   virtual void DisableNack() = 0;
 986
 987   //
 988   // Get a list of packets to be retransmitted. |round_trip_time_ms| is an
 989   // estimate of the round-trip-time (in milliseconds). Missing packets which
 990   // will be playout in a shorter time than the round-trip-time (with respect
 991   // to the time this API is called) will not be included in the list.
 992   //
 993   // Negative |round_trip_time_ms| results is an error message and empty list
 994   // is returned.
 995   //
 996   virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0;
 997
 998   virtual void GetDecodingCallStatistics(
 999       AudioDecodingCallStats* call_stats) const = 0;
1000 };
1001
1002 class AudioEncoder;
1003 class ReceiverInfo;
1004
1005 class AudioCoding {
1006  public:
1007   struct Config {
1008     Config()
1009         : neteq_config(),
1010           clock(Clock::GetRealTimeClock()),
1011           transport(NULL),
1012           vad_callback(NULL),
1013           play_dtmf(true),
1014           initial_playout_delay_ms(0),
1015           playout_channels(1),
1016           playout_frequency_hz(32000) {}
1017
1018     AudioCodingModule::Config ToOldConfig() const {
1019       AudioCodingModule::Config old_config;
1020       old_config.id = 0;
1021       old_config.neteq_config = neteq_config;
1022       old_config.clock = clock;
1023       return old_config;
1024     }
1025
1026     NetEq::Config neteq_config;
1027     Clock* clock;
1028     AudioPacketizationCallback* transport;
1029     ACMVADCallback* vad_callback;
1030     bool play_dtmf;
1031     int initial_playout_delay_ms;
1032     int playout_channels;
1033     int playout_frequency_hz;
1034   };
1035
1036   static AudioCoding* Create(const Config& config);
1037   virtual ~AudioCoding() {};
1038
1039   // Registers a codec, specified by |send_codec|, as sending codec.
1040   // This API can be called multiple times. The last codec registered overwrites
1041   // the previous ones. Returns true if successful, false if not.
1042   //
1043   // Note: If a stereo codec is registered as send codec, VAD/DTX will
1044   // automatically be turned off, since it is not supported for stereo sending.
1045   virtual bool RegisterSendCodec(AudioEncoder* send_codec) = 0;
1046
1047   // Temporary solution to be used during refactoring:
1048   // |encoder_type| should be from the anonymous enum in acm2::ACMCodecDB.
1049   virtual bool RegisterSendCodec(int encoder_type,
1050                                  uint8_t payload_type,
1051                                  int frame_size_samples = 0) = 0;
1052
1053   // Returns the encoder object currently in use. This is the same as the
1054   // codec that was registered in the latest call to RegisterSendCodec().
1055   virtual const AudioEncoder* GetSenderInfo() const = 0;
1056
1057   // Temporary solution to be used during refactoring.
1058   virtual const CodecInst* GetSenderCodecInst() = 0;
1059
1060   // Adds 10 ms of raw (PCM) audio data to the encoder. If the sampling
1061   // frequency of the audio does not match the sampling frequency of the
1062   // current encoder, ACM will resample the audio.
1063   //
1064   // Return value:
1065   //      0   successfully added the frame.
1066   //     -1   some error occurred and data is not added.
1067   //   < -1   to add the frame to the buffer n samples had to be
1068   //          overwritten, -n is the return value in this case.
1069   // TODO(henrik.lundin): Make a better design for the return values. This one
1070   // is just a copy of the old API.
1071   virtual int Add10MsAudio(const AudioFrame& audio_frame) = 0;
1072
1073   // Returns a combined info about the currently used decoder(s).
1074   virtual const ReceiverInfo* GetReceiverInfo() const = 0;
1075
1076   // Registers a codec, specified by |receive_codec|, as receiving codec.
1077   // This API can be called multiple times. If registering with a payload type
1078   // that was already registered in a previous call, the latest call will
1079   // override previous calls. Returns true if successful, false if not.
1080   virtual bool RegisterReceiveCodec(AudioDecoder* receive_codec) = 0;
1081
1082   // Temporary solution:
1083   // |decoder_type| should be from the anonymous enum in acm2::ACMCodecDB.
1084   virtual bool RegisterReceiveCodec(int decoder_type, uint8_t payload_type) = 0;
1085
1086   // The following two methods both inserts a new packet to the receiver.
1087   // InsertPacket takes an RTP header input in |rtp_info|, while InsertPayload
1088   // only requires a payload type and a timestamp. The latter assumes that the
1089   // payloads come in the right order, and without any losses. In both cases,
1090   // |incoming_payload| contains the RTP payload after the RTP header. Return
1091   // true if successful, false if not.
1092   virtual bool InsertPacket(const uint8_t* incoming_payload,
1093                             int32_t payload_len_bytes,
1094                             const WebRtcRTPHeader& rtp_info) = 0;
1095
1096   // TODO(henrik.lundin): Remove this method?
1097   virtual bool InsertPayload(const uint8_t* incoming_payload,
1098                              int32_t payload_len_byte,
1099                              uint8_t payload_type,
1100                              uint32_t timestamp) = 0;
1101
1102   // These two methods set a minimum and maximum jitter buffer delay in
1103   // milliseconds. The pupose is mainly to adjust the delay to synchronize
1104   // audio and video. The preferred jitter buffer size, computed by NetEq based
1105   // on the current channel conditions, is clamped from below and above by these
1106   // two methods. The given delay limits must be non-negative, less than
1107   // 10000 ms, and the minimum must be strictly smaller than the maximum.
1108   // Further, the maximum must be at lest one frame duration. If these
1109   // conditions are not met, false is returned. Giving the value 0 effectively
1110   // unsets the minimum or maximum delay limits.
1111   // Note that calling these methods is optional. If not called, NetEq will
1112   // determine the optimal buffer size based on the network conditions.
1113   virtual bool SetMinimumPlayoutDelay(int time_ms) = 0;
1114
1115   virtual bool SetMaximumPlayoutDelay(int time_ms) = 0;
1116
1117   // Returns the current value of the jitter buffer's preferred latency. This
1118   // is computed based on inter-arrival times and playout mode of NetEq. The
1119   // actual target delay is this value clamped from below and above by the
1120   // values specified through SetMinimumPlayoutDelay() and
1121   // SetMaximumPlayoutDelay(), respectively, if provided.
1122   // TODO(henrik.lundin) Rename to PreferredDelayMs?
1123   virtual int LeastRequiredDelayMs() const = 0;
1124
1125   // The send timestamp of an RTP packet is associated with the decoded
1126   // audio of the packet in question. This function returns the timestamp of
1127   // the latest audio delivered by Get10MsAudio(). Returns false if no timestamp
1128   // can be provided, true otherwise.
1129   virtual bool PlayoutTimestamp(uint32_t* timestamp) = 0;
1130
1131   // Delivers 10 ms of audio in |audio_frame|. Returns true if successful,
1132   // false otherwise.
1133   virtual bool Get10MsAudio(AudioFrame* audio_frame) = 0;
1134
1135   // Returns the network statistics. Note that the internal statistics of NetEq
1136   // are reset by this call. Returns true if successful, false otherwise.
1137   virtual bool NetworkStatistics(ACMNetworkStatistics* network_statistics) = 0;
1138
1139   // Enables NACK and sets the maximum size of the NACK list. If NACK is already
1140   // enabled then the maximum NACK list size is modified accordingly. Returns
1141   // true if successful, false otherwise.
1142   //
1143   // If the sequence number of last received packet is N, the sequence numbers
1144   // of NACK list are in the range of [N - |max_nack_list_size|, N).
1145   //
1146   // |max_nack_list_size| should be positive and less than or equal to
1147   // |Nack::kNackListSizeLimit|.
1148   virtual bool EnableNack(size_t max_nack_list_size) = 0;
1149
1150   // Disables NACK.
1151   virtual void DisableNack() = 0;
1152
1153
1154   // Temporary solution to be used during refactoring.
1155   // If DTX is enabled and the codec does not have internal DTX/VAD
1156   // WebRtc VAD will be automatically enabled and |enable_vad| is ignored.
1157   //
1158   // If DTX is disabled but VAD is enabled no DTX packets are sent,
1159   // regardless of whether the codec has internal DTX/VAD or not. In this
1160   // case, WebRtc VAD is running to label frames as active/in-active.
1161   //
1162   // NOTE! VAD/DTX is not supported when sending stereo.
1163   //
1164   // Return true if successful, false otherwise.
1165   virtual bool SetVad(bool enable_dtx,
1166                       bool enable_vad,
1167                       ACMVADMode vad_mode) = 0;
1168
1169   // Returns a list of packets to request retransmission of.
1170   // |round_trip_time_ms| is an estimate of the round-trip-time (in
1171   // milliseconds). Missing packets which will be decoded sooner than the
1172   // round-trip-time (with respect to the time this API is called) will not be
1173   // included in the list.
1174   // |round_trip_time_ms| must be non-negative.
1175   virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0;
1176
1177   // Returns the timing statistics for calls to Get10MsAudio.
1178   virtual void GetDecodingCallStatistics(
1179       AudioDecodingCallStats* call_stats) const = 0;
1180 };
1181
1182 }  // namespace webrtc
1183
1184 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_