2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
16 #include "webrtc/common_types.h"
17 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
19 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
20 #include "webrtc/modules/interface/module.h"
21 #include "webrtc/system_wrappers/interface/clock.h"
22 #include "webrtc/typedefs.h"
26 // forward declarations
28 struct WebRtcRTPHeader;
30 class RTPFragmentationHeader;
32 #define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz
34 // Callback class used for sending data ready to be packetized
35 class AudioPacketizationCallback {
37 virtual ~AudioPacketizationCallback() {}
39 virtual int32_t SendData(
43 const uint8_t* payload_data,
44 uint16_t payload_len_bytes,
45 const RTPFragmentationHeader* fragmentation) = 0;
48 // Callback class used for inband Dtmf detection
49 class AudioCodingFeedback {
51 virtual ~AudioCodingFeedback() {}
53 virtual int32_t IncomingDtmf(const uint8_t digit_dtmf,
57 // Callback class used for reporting VAD decision
58 class ACMVADCallback {
60 virtual ~ACMVADCallback() {}
62 virtual int32_t InFrameType(int16_t frameType) = 0;
65 // Callback class used for reporting receiver statistics
66 class ACMVQMonCallback {
68 virtual ~ACMVQMonCallback() {}
70 virtual int32_t NetEqStatistics(
71 const int32_t id, // current ACM id
72 const uint16_t MIUsValid, // valid voice duration in ms
73 const uint16_t MIUsReplaced, // concealed voice duration in ms
74 const uint8_t eventFlags, // concealed voice flags
75 const uint16_t delayMS) = 0; // average delay in ms
78 class AudioCodingModule: public Module {
80 AudioCodingModule() {}
87 clock(Clock::GetRealTimeClock()) {}
90 NetEq::Config neteq_config;
94 ///////////////////////////////////////////////////////////////////////////
95 // Creation and destruction of a ACM.
97 // The second method is used for testing where a simulated clock can be
98 // injected into ACM. ACM will take the ownership of the object clock and
99 // delete it when destroyed.
101 static AudioCodingModule* Create(int id);
102 static AudioCodingModule* Create(int id, Clock* clock);
103 virtual ~AudioCodingModule() {};
105 ///////////////////////////////////////////////////////////////////////////
109 ///////////////////////////////////////////////////////////////////////////
110 // uint8_t NumberOfCodecs()
111 // Returns number of supported codecs.
114 // number of supported codecs.
116 static int NumberOfCodecs();
118 ///////////////////////////////////////////////////////////////////////////
120 // Get supported codec with list number.
123 // -list_id : list number.
126 // -codec : a structure where the parameters of the codec,
127 // given by list number is written to.
130 // -1 if the list number (list_id) is invalid.
133 static int Codec(int list_id, CodecInst* codec);
135 ///////////////////////////////////////////////////////////////////////////
137 // Get supported codec with the given codec name, sampling frequency, and
138 // a given number of channels.
141 // -payload_name : name of the codec.
142 // -sampling_freq_hz : sampling frequency of the codec. Note! for RED
143 // a sampling frequency of -1 is a valid input.
144 // -channels : number of channels ( 1 - mono, 2 - stereo).
147 // -codec : a structure where the function returns the
148 // default parameters of the codec.
151 // -1 if no codec matches the given parameters.
154 static int Codec(const char* payload_name, CodecInst* codec,
155 int sampling_freq_hz, int channels);
157 ///////////////////////////////////////////////////////////////////////////
160 // Returns the list number of the given codec name, sampling frequency, and
161 // a given number of channels.
164 // -payload_name : name of the codec.
165 // -sampling_freq_hz : sampling frequency of the codec. Note! for RED
166 // a sampling frequency of -1 is a valid input.
167 // -channels : number of channels ( 1 - mono, 2 - stereo).
170 // if the codec is found, the index of the codec in the list,
171 // -1 if the codec is not found.
173 static int Codec(const char* payload_name, int sampling_freq_hz,
176 ///////////////////////////////////////////////////////////////////////////
177 // bool IsCodecValid()
178 // Checks the validity of the parameters of the given codec.
181 // -codec : the structure which keeps the parameters of the
185 // true if the parameters are valid,
186 // false if any parameter is not valid.
188 static bool IsCodecValid(const CodecInst& codec);
190 ///////////////////////////////////////////////////////////////////////////
194 ///////////////////////////////////////////////////////////////////////////
195 // int32_t InitializeSender()
196 // Any encoder-related state of ACM will be initialized to the
197 // same state when ACM is created. This will not interrupt or
198 // effect decoding functionality of ACM. ACM will lose all the
199 // encoding-related settings by calling this function.
200 // For instance, a send codec has to be registered again.
203 // -1 if failed to initialize,
206 virtual int32_t InitializeSender() = 0;
208 ///////////////////////////////////////////////////////////////////////////
209 // int32_t ResetEncoder()
210 // This API resets the states of encoder. All the encoder settings, such as
211 // send-codec or VAD/DTX, will be preserved.
214 // -1 if failed to initialize,
217 virtual int32_t ResetEncoder() = 0;
219 ///////////////////////////////////////////////////////////////////////////
220 // int32_t RegisterSendCodec()
221 // Registers a codec, specified by |send_codec|, as sending codec.
222 // This API can be called multiple of times to register Codec. The last codec
223 // registered overwrites the previous ones.
224 // The API can also be used to change payload type for CNG and RED, which are
225 // registered by default to default payload types.
226 // Note that registering CNG and RED won't overwrite speech codecs.
227 // This API can be called to set/change the send payload-type, frame-size
228 // or encoding rate (if applicable for the codec).
230 // Note: If a stereo codec is registered as send codec, VAD/DTX will
231 // automatically be turned off, since it is not supported for stereo sending.
233 // Note: If a secondary encoder is already registered, and the new send-codec
234 // has a sampling rate that does not match the secondary encoder, the
235 // secondary encoder will be unregistered.
238 // -send_codec : Parameters of the codec to be registered, c.f.
239 // common_types.h for the definition of
243 // -1 if failed to initialize,
246 virtual int32_t RegisterSendCodec(const CodecInst& send_codec) = 0;
248 ///////////////////////////////////////////////////////////////////////////
249 // int RegisterSecondarySendCodec()
250 // Register a secondary encoder to enable dual-streaming. If a secondary
251 // codec is already registered, it will be removed before the new one is
254 // Note: The secondary encoder will be unregistered if a primary codec
255 // is set with a sampling rate which does not match that of the existing
259 // -send_codec : Parameters of the codec to be registered, c.f.
260 // common_types.h for the definition of
264 // -1 if failed to register,
267 virtual int RegisterSecondarySendCodec(const CodecInst& send_codec) = 0;
269 ///////////////////////////////////////////////////////////////////////////
270 // void UnregisterSecondarySendCodec()
271 // Unregister the secondary encoder to disable dual-streaming.
273 virtual void UnregisterSecondarySendCodec() = 0;
275 ///////////////////////////////////////////////////////////////////////////
276 // int32_t SendCodec()
277 // Get parameters for the codec currently registered as send codec.
280 // -current_send_codec : parameters of the send codec.
283 // -1 if failed to get send codec,
286 virtual int32_t SendCodec(CodecInst* current_send_codec) const = 0;
288 ///////////////////////////////////////////////////////////////////////////
289 // int SecondarySendCodec()
290 // Get the codec parameters for the current secondary send codec.
293 // -secondary_codec : parameters of the secondary send codec.
296 // -1 if failed to get send codec,
299 virtual int SecondarySendCodec(CodecInst* secondary_codec) const = 0;
301 ///////////////////////////////////////////////////////////////////////////
302 // int32_t SendFrequency()
303 // Get the sampling frequency of the current encoder in Hertz.
306 // positive; sampling frequency [Hz] of the current encoder.
307 // -1 if an error has happened.
309 virtual int32_t SendFrequency() const = 0;
311 ///////////////////////////////////////////////////////////////////////////
313 // Get encoding bit-rate in bits per second.
316 // positive; encoding rate in bits/sec,
317 // -1 if an error is happened.
319 virtual int32_t SendBitrate() const = 0;
321 ///////////////////////////////////////////////////////////////////////////
322 // int32_t SetReceivedEstimatedBandwidth()
323 // Set available bandwidth [bits/sec] of the up-link channel.
324 // This information is used for traffic shaping, and is currently only
325 // supported if iSAC is the send codec.
328 // -bw : bandwidth in bits/sec estimated for
331 // -1 if error occurred in setting the bandwidth,
332 // 0 bandwidth is set successfully.
334 // TODO(henrik.lundin) Unused. Remove?
335 virtual int32_t SetReceivedEstimatedBandwidth(
336 const int32_t bw) = 0;
338 ///////////////////////////////////////////////////////////////////////////
339 // int32_t RegisterTransportCallback()
340 // Register a transport callback which will be called to deliver
341 // the encoded buffers whenever Process() is called and a
342 // bit-stream is ready.
345 // -transport : pointer to the callback class
346 // transport->SendData() is called whenever
347 // Process() is called and bit-stream is ready
351 // -1 if the transport callback could not be registered
352 // 0 if registration is successful.
354 virtual int32_t RegisterTransportCallback(
355 AudioPacketizationCallback* transport) = 0;
357 ///////////////////////////////////////////////////////////////////////////
358 // int32_t Add10MsData()
359 // Add 10MS of raw (PCM) audio data to the encoder. If the sampling
360 // frequency of the audio does not match the sampling frequency of the
361 // current encoder ACM will resample the audio.
364 // -audio_frame : the input audio frame, containing raw audio
365 // sampling frequency etc.,
366 // c.f. module_common_types.h for definition of
370 // 0 successfully added the frame.
371 // -1 some error occurred and data is not added.
372 // < -1 to add the frame to the buffer n samples had to be
373 // overwritten, -n is the return value in this case.
375 virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
377 ///////////////////////////////////////////////////////////////////////////
378 // (RED) Redundant Coding
381 ///////////////////////////////////////////////////////////////////////////
382 // int32_t SetREDStatus()
383 // configure RED status i.e. on/off.
385 // RFC 2198 describes a solution which has a single payload type which
386 // signifies a packet with redundancy. That packet then becomes a container,
387 // encapsulating multiple payloads into a single RTP packet.
388 // Such a scheme is flexible, since any amount of redundancy may be
389 // encapsulated within a single packet. There is, however, a small overhead
390 // since each encapsulated payload must be preceded by a header indicating
391 // the type of data enclosed.
394 // -enable_red : if true RED is enabled, otherwise RED is
398 // -1 if failed to set RED status,
401 virtual int32_t SetREDStatus(bool enable_red) = 0;
403 ///////////////////////////////////////////////////////////////////////////
408 // true if RED is enabled,
409 // false if RED is disabled.
411 virtual bool REDStatus() const = 0;
413 ///////////////////////////////////////////////////////////////////////////
414 // (FEC) Forward Error Correction (codec internal)
417 ///////////////////////////////////////////////////////////////////////////
418 // int32_t SetCodecFEC()
419 // Configures codec internal FEC status i.e. on/off. No effects on codecs that
420 // do not provide internal FEC.
423 // -enable_fec : if true FEC will be enabled otherwise the FEC is
427 // -1 if failed, or the codec does not support FEC
430 virtual int SetCodecFEC(bool enable_codec_fec) = 0;
432 ///////////////////////////////////////////////////////////////////////////
434 // Gets status of codec internal FEC.
437 // true if FEC is enabled,
438 // false if FEC is disabled.
440 virtual bool CodecFEC() const = 0;
442 ///////////////////////////////////////////////////////////////////////////
443 // int SetPacketLossRate()
444 // Sets expected packet loss rate for encoding. Some encoders provide packet
445 // loss gnostic encoding to make stream less sensitive to packet losses,
446 // through e.g., FEC. No effects on codecs that do not provide such encoding.
449 // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive).
452 // -1 if failed to set packet loss rate,
455 virtual int SetPacketLossRate(int packet_loss_rate) = 0;
457 ///////////////////////////////////////////////////////////////////////////
458 // (VAD) Voice Activity Detection
461 ///////////////////////////////////////////////////////////////////////////
463 // If DTX is enabled & the codec does not have internal DTX/VAD
464 // WebRtc VAD will be automatically enabled and |enable_vad| is ignored.
466 // If DTX is disabled but VAD is enabled no DTX packets are send,
467 // regardless of whether the codec has internal DTX/VAD or not. In this
468 // case, WebRtc VAD is running to label frames as active/in-active.
470 // NOTE! VAD/DTX is not supported when sending stereo.
473 // -enable_dtx : if true DTX is enabled,
474 // otherwise DTX is disabled.
475 // -enable_vad : if true VAD is enabled,
476 // otherwise VAD is disabled.
477 // -vad_mode : determines the aggressiveness of VAD. A more
478 // aggressive mode results in more frames labeled
479 // as in-active, c.f. definition of
480 // ACMVADMode in audio_coding_module_typedefs.h
484 // -1 if failed to set up VAD/DTX,
487 virtual int32_t SetVAD(const bool enable_dtx = true,
488 const bool enable_vad = false,
489 const ACMVADMode vad_mode = VADNormal) = 0;
491 ///////////////////////////////////////////////////////////////////////////
496 // -dtx_enabled : is set to true if DTX is enabled, otherwise
498 // -vad_enabled : is set to true if VAD is enabled, otherwise
500 // -vad_mode : is set to the current aggressiveness of VAD.
503 // -1 if fails to retrieve the setting of DTX/VAD,
506 virtual int32_t VAD(bool* dtx_enabled, bool* vad_enabled,
507 ACMVADMode* vad_mode) const = 0;
509 ///////////////////////////////////////////////////////////////////////////
510 // int32_t ReplaceInternalDTXWithWebRtc()
511 // Used to replace codec internal DTX scheme with WebRtc. This is only
512 // supported for G729, where this call replaces AnnexB with WebRtc DTX.
515 // -use_webrtc_dtx : if false (default) the codec built-in DTX/VAD
516 // scheme is used, otherwise the internal DTX is
517 // replaced with WebRtc DTX/VAD.
520 // -1 if failed to replace codec internal DTX with WebRtc,
523 virtual int32_t ReplaceInternalDTXWithWebRtc(
524 const bool use_webrtc_dtx = false) = 0;
526 ///////////////////////////////////////////////////////////////////////////
527 // int32_t IsInternalDTXReplacedWithWebRtc()
528 // Get status if the codec internal DTX (when such exists) is replaced with
529 // WebRtc DTX. This is only supported for G729.
532 // -uses_webrtc_dtx : is set to true if the codec internal DTX is
533 // replaced with WebRtc DTX/VAD, otherwise it is set
537 // -1 if failed to determine if codec internal DTX is replaced with WebRtc,
540 virtual int32_t IsInternalDTXReplacedWithWebRtc(
541 bool* uses_webrtc_dtx) = 0;
543 ///////////////////////////////////////////////////////////////////////////
544 // int32_t RegisterVADCallback()
545 // Call this method to register a callback function which is called
546 // any time that ACM encounters an empty frame. That is a frame which is
547 // recognized inactive. Depending on the codec WebRtc VAD or internal codec
548 // VAD is employed to identify a frame as active/inactive.
551 // -vad_callback : pointer to a callback function.
554 // -1 if failed to register the callback function.
555 // 0 if the callback function is registered successfully.
557 virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
559 ///////////////////////////////////////////////////////////////////////////
563 ///////////////////////////////////////////////////////////////////////////
564 // int32_t InitializeReceiver()
565 // Any decoder-related state of ACM will be initialized to the
566 // same state when ACM is created. This will not interrupt or
567 // effect encoding functionality of ACM. ACM would lose all the
568 // decoding-related settings by calling this function.
569 // For instance, all registered codecs are deleted and have to be
573 // -1 if failed to initialize,
576 virtual int32_t InitializeReceiver() = 0;
578 ///////////////////////////////////////////////////////////////////////////
579 // int32_t ResetDecoder()
580 // This API resets the states of decoders. ACM will not lose any
581 // decoder-related settings, such as registered codecs.
584 // -1 if failed to initialize,
587 virtual int32_t ResetDecoder() = 0;
589 ///////////////////////////////////////////////////////////////////////////
590 // int32_t ReceiveFrequency()
591 // Get sampling frequency of the last received payload.
594 // non-negative the sampling frequency in Hertz.
595 // -1 if an error has occurred.
597 virtual int32_t ReceiveFrequency() const = 0;
599 ///////////////////////////////////////////////////////////////////////////
600 // int32_t PlayoutFrequency()
601 // Get sampling frequency of audio played out.
604 // the sampling frequency in Hertz.
606 virtual int32_t PlayoutFrequency() const = 0;
608 ///////////////////////////////////////////////////////////////////////////
609 // int32_t RegisterReceiveCodec()
610 // Register possible decoders, can be called multiple times for
611 // codecs, CNG-NB, CNG-WB, CNG-SWB, AVT and RED.
614 // -receive_codec : parameters of the codec to be registered, c.f.
615 // common_types.h for the definition of
619 // -1 if failed to register the codec
620 // 0 if the codec registered successfully.
622 virtual int32_t RegisterReceiveCodec(
623 const CodecInst& receive_codec) = 0;
625 ///////////////////////////////////////////////////////////////////////////
626 // int32_t UnregisterReceiveCodec()
627 // Unregister the codec currently registered with a specific payload type
628 // from the list of possible receive codecs.
631 // -payload_type : The number representing the payload type to
635 // -1 if fails to unregister.
636 // 0 if the given codec is successfully unregistered.
638 virtual int UnregisterReceiveCodec(
639 uint8_t payload_type) = 0;
641 ///////////////////////////////////////////////////////////////////////////
642 // int32_t ReceiveCodec()
643 // Get the codec associated with last received payload.
646 // -curr_receive_codec : parameters of the codec associated with the last
647 // received payload, c.f. common_types.h for
648 // the definition of CodecInst.
651 // -1 if failed to retrieve the codec,
652 // 0 if the codec is successfully retrieved.
654 virtual int32_t ReceiveCodec(CodecInst* curr_receive_codec) const = 0;
656 ///////////////////////////////////////////////////////////////////////////
657 // int32_t IncomingPacket()
658 // Call this function to insert a parsed RTP packet into ACM.
661 // -incoming_payload : received payload.
662 // -payload_len_bytes : the length of payload in bytes.
663 // -rtp_info : the relevant information retrieved from RTP
667 // -1 if failed to push in the payload
668 // 0 if payload is successfully pushed in.
670 virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
671 const int32_t payload_len_bytes,
672 const WebRtcRTPHeader& rtp_info) = 0;
674 ///////////////////////////////////////////////////////////////////////////
675 // int32_t IncomingPayload()
676 // Call this API to push incoming payloads when there is no rtp-info.
677 // The rtp-info will be created in ACM. One usage for this API is when
678 // pre-encoded files are pushed in ACM
681 // -incoming_payload : received payload.
682 // -payload_len_byte : the length, in bytes, of the received payload.
683 // -payload_type : the payload-type. This specifies which codec has
684 // to be used to decode the payload.
685 // -timestamp : send timestamp of the payload. ACM starts with
686 // a random value and increment it by the
687 // packet-size, which is given when the codec in
688 // question is registered by RegisterReceiveCodec().
689 // Therefore, it is essential to have the timestamp
690 // if the frame-size differ from the registered
691 // value or if the incoming payload contains DTX
695 // -1 if failed to push in the payload
696 // 0 if payload is successfully pushed in.
698 virtual int32_t IncomingPayload(const uint8_t* incoming_payload,
699 const int32_t payload_len_byte,
700 const uint8_t payload_type,
701 const uint32_t timestamp = 0) = 0;
703 ///////////////////////////////////////////////////////////////////////////
704 // int SetMinimumPlayoutDelay()
705 // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
706 // such a delay unless channel condition yields to a higher delay.
709 // -time_ms : minimum delay in milliseconds.
712 // -1 if failed to set the delay,
713 // 0 if the minimum delay is set.
715 virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
717 ///////////////////////////////////////////////////////////////////////////
718 // int SetMaximumPlayoutDelay()
719 // Set a maximum for the playout delay
722 // -time_ms : maximum delay in milliseconds.
725 // -1 if failed to set the delay,
726 // 0 if the maximum delay is set.
728 virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
731 // The shortest latency, in milliseconds, required by jitter buffer. This
732 // is computed based on inter-arrival times and playout mode of NetEq. The
733 // actual delay is the maximum of least-required-delay and the minimum-delay
734 // specified by SetMinumumPlayoutDelay() API.
736 virtual int LeastRequiredDelayMs() const = 0;
738 ///////////////////////////////////////////////////////////////////////////
739 // int32_t SetDtmfPlayoutStatus()
740 // Configure DTMF playout, i.e. whether out-of-band
741 // DTMF tones are played or not.
744 // -enable : if true to enable playout out-of-band DTMF tones,
748 // -1 if the method fails, e.g. DTMF playout is not supported.
749 // 0 if the status is set successfully.
751 virtual int32_t SetDtmfPlayoutStatus(const bool enable) = 0;
753 ///////////////////////////////////////////////////////////////////////////
754 // bool DtmfPlayoutStatus()
755 // Get Dtmf playout status.
758 // true if out-of-band Dtmf tones are played,
759 // false if playout of Dtmf tones is disabled.
761 virtual bool DtmfPlayoutStatus() const = 0;
763 ///////////////////////////////////////////////////////////////////////////
764 // int32_t PlayoutTimestamp()
765 // The send timestamp of an RTP packet is associated with the decoded
766 // audio of the packet in question. This function returns the timestamp of
767 // the latest audio obtained by calling PlayoutData10ms().
770 // -timestamp : a reference to a uint32_t to receive the
773 // 0 if the output is a correct timestamp.
774 // -1 if failed to output the correct timestamp.
776 // TODO(tlegrand): Change function to return the timestamp.
777 virtual int32_t PlayoutTimestamp(uint32_t* timestamp) = 0;
779 ///////////////////////////////////////////////////////////////////////////
780 // int32_t DecoderEstimatedBandwidth()
781 // Get the estimate of the Bandwidth, in bits/second, based on the incoming
782 // stream. This API is useful in one-way communication scenarios, where
783 // the bandwidth information is sent in an out-of-band fashion.
784 // Currently only supported if iSAC is registered as a receiver.
787 // >0 bandwidth in bits/second.
788 // -1 if failed to get a bandwidth estimate.
790 virtual int32_t DecoderEstimatedBandwidth() const = 0;
792 ///////////////////////////////////////////////////////////////////////////
793 // int32_t SetPlayoutMode()
794 // Call this API to set the playout mode. Playout mode could be optimized
795 // for i) voice, ii) FAX or iii) streaming. In Voice mode, NetEQ is
796 // optimized to deliver highest audio quality while maintaining a minimum
797 // delay. In FAX mode, NetEQ is optimized to have few delay changes as
798 // possible and maintain a constant delay, perhaps large relative to voice
799 // mode, to avoid PLC. In streaming mode, we tolerate a little more delay
800 // to achieve better jitter robustness.
803 // -mode : playout mode. Possible inputs are:
809 // -1 if failed to set the mode,
812 virtual int32_t SetPlayoutMode(const AudioPlayoutMode mode) = 0;
814 ///////////////////////////////////////////////////////////////////////////
815 // AudioPlayoutMode PlayoutMode()
816 // Get playout mode, i.e. whether it is speech, FAX or streaming. See
817 // audio_coding_module_typedefs.h for definition of AudioPlayoutMode.
820 // voice: is for voice output,
821 // fax: a mode that is optimized for receiving FAX signals.
822 // In this mode NetEq tries to maintain a constant high
823 // delay to avoid PLC if possible.
824 // streaming: a mode that is suitable for streaming. In this mode we
825 // accept longer delay to improve jitter robustness.
827 virtual AudioPlayoutMode PlayoutMode() const = 0;
829 ///////////////////////////////////////////////////////////////////////////
830 // int32_t PlayoutData10Ms(
831 // Get 10 milliseconds of raw audio data for playout, at the given sampling
832 // frequency. ACM will perform a resampling if required.
835 // -desired_freq_hz : the desired sampling frequency, in Hertz, of the
836 // output audio. If set to -1, the function returns
837 // the audio at the current sampling frequency.
840 // -audio_frame : output audio frame which contains raw audio data
841 // and other relevant parameters, c.f.
842 // module_common_types.h for the definition of
846 // -1 if the function fails,
847 // 0 if the function succeeds.
849 virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
850 AudioFrame* audio_frame) = 0;
852 ///////////////////////////////////////////////////////////////////////////
856 ///////////////////////////////////////////////////////////////////////////
857 // int32_t SetISACMaxRate()
858 // Set the maximum instantaneous rate of iSAC. For a payload of B bits
859 // with a frame-size of T sec the instantaneous rate is B/T bits per
860 // second. Therefore, (B/T < |max_rate_bps|) and
861 // (B < |max_payload_len_bytes| * 8) are always satisfied for iSAC payloads,
862 // c.f SetISACMaxPayloadSize().
865 // -max_rate_bps : maximum instantaneous bit-rate given in bits/sec.
868 // -1 if failed to set the maximum rate.
869 // 0 if the maximum rate is set successfully.
871 virtual int SetISACMaxRate(int max_rate_bps) = 0;
873 ///////////////////////////////////////////////////////////////////////////
874 // int32_t SetISACMaxPayloadSize()
875 // Set the maximum payload size of iSAC packets. No iSAC payload,
876 // regardless of its frame-size, may exceed the given limit. For
877 // an iSAC payload of size B bits and frame-size T seconds we have;
878 // (B < |max_payload_len_bytes| * 8) and (B/T < |max_rate_bps|), c.f.
882 // -max_payload_len_bytes : maximum payload size in bytes.
885 // -1 if failed to set the maximum payload-size.
886 // 0 if the given length is set successfully.
888 virtual int SetISACMaxPayloadSize(int max_payload_len_bytes) = 0;
890 ///////////////////////////////////////////////////////////////////////////
891 // int32_t ConfigISACBandwidthEstimator()
892 // Call this function to configure the bandwidth estimator of ISAC.
893 // During the adaptation of bit-rate, iSAC automatically adjusts the
894 // frame-size (either 30 or 60 ms) to save on RTP header. The initial
895 // frame-size can be specified by the first argument. The configuration also
896 // regards the initial estimate of bandwidths. The estimator starts from
897 // this point and converges to the actual bottleneck. This is given by the
898 // second parameter. Furthermore, it is also possible to control the
899 // adaptation of frame-size. This is specified by the last parameter.
902 // -init_frame_size_ms : initial frame-size in milliseconds. For iSAC-wb
903 // 30 ms and 60 ms (default) are acceptable values,
904 // and for iSAC-swb 30 ms is the only acceptable
905 // value. Zero indicates default value.
906 // -init_rate_bps : initial estimate of the bandwidth. Values
907 // between 10000 and 58000 are acceptable.
908 // -enforce_srame_size : if true, the frame-size will not be adapted.
911 // -1 if failed to configure the bandwidth estimator,
912 // 0 if the configuration was successfully applied.
914 virtual int32_t ConfigISACBandwidthEstimator(
915 int init_frame_size_ms,
917 bool enforce_frame_size = false) = 0;
919 ///////////////////////////////////////////////////////////////////////////
920 // int SetOpusMaxPlaybackRate()
921 // If current send codec is Opus, informs it about maximum playback rate the
922 // receiver will render. Opus can use this information to optimize the bit
923 // rate and increase the computation efficiency.
926 // -frequency_hz : maximum playback rate in Hz.
929 // -1 if current send codec is not Opus or
930 // error occurred in setting the maximum playback rate,
931 // 0 maximum bandwidth is set successfully.
933 virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;
935 ///////////////////////////////////////////////////////////////////////////
939 ///////////////////////////////////////////////////////////////////////////
940 // int32_t NetworkStatistics()
941 // Get network statistics. Note that the internal statistics of NetEq are
942 // reset by this call.
945 // -network_statistics : a structure that contains network statistics.
948 // -1 if failed to set the network statistics,
949 // 0 if statistics are set successfully.
951 virtual int32_t NetworkStatistics(
952 ACMNetworkStatistics* network_statistics) = 0;
955 // Set an initial delay for playout.
956 // An initial delay yields ACM playout silence until equivalent of |delay_ms|
957 // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
958 // from NetEq in its regular fashion, and the given delay is maintained
959 // through out the call, unless channel conditions yield to a higher jitter
963 // -delay_ms : delay in milliseconds.
966 // -1 if failed to set the delay.
967 // 0 if delay is set successfully.
969 virtual int SetInitialPlayoutDelay(int delay_ms) = 0;
972 // Enable NACK and set the maximum size of the NACK list. If NACK is already
973 // enable then the maximum NACK list size is modified accordingly.
975 // If the sequence number of last received packet is N, the sequence numbers
976 // of NACK list are in the range of [N - |max_nack_list_size|, N).
978 // |max_nack_list_size| should be positive (none zero) and less than or
979 // equal to |Nack::kNackListSizeLimit|. Otherwise, No change is applied and -1
980 // is returned. 0 is returned at success.
982 virtual int EnableNack(size_t max_nack_list_size) = 0;
985 virtual void DisableNack() = 0;
988 // Get a list of packets to be retransmitted. |round_trip_time_ms| is an
989 // estimate of the round-trip-time (in milliseconds). Missing packets which
990 // will be playout in a shorter time than the round-trip-time (with respect
991 // to the time this API is called) will not be included in the list.
993 // Negative |round_trip_time_ms| results is an error message and empty list
996 virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0;
998 virtual void GetDecodingCallStatistics(
999 AudioDecodingCallStats* call_stats) const = 0;
1010 clock(Clock::GetRealTimeClock()),
1014 initial_playout_delay_ms(0),
1015 playout_channels(1),
1016 playout_frequency_hz(32000) {}
1018 AudioCodingModule::Config ToOldConfig() const {
1019 AudioCodingModule::Config old_config;
1021 old_config.neteq_config = neteq_config;
1022 old_config.clock = clock;
1026 NetEq::Config neteq_config;
1028 AudioPacketizationCallback* transport;
1029 ACMVADCallback* vad_callback;
1031 int initial_playout_delay_ms;
1032 int playout_channels;
1033 int playout_frequency_hz;
1036 static AudioCoding* Create(const Config& config);
1037 virtual ~AudioCoding() {};
1039 // Registers a codec, specified by |send_codec|, as sending codec.
1040 // This API can be called multiple times. The last codec registered overwrites
1041 // the previous ones. Returns true if successful, false if not.
1043 // Note: If a stereo codec is registered as send codec, VAD/DTX will
1044 // automatically be turned off, since it is not supported for stereo sending.
1045 virtual bool RegisterSendCodec(AudioEncoder* send_codec) = 0;
1047 // Temporary solution to be used during refactoring:
1048 // |encoder_type| should be from the anonymous enum in acm2::ACMCodecDB.
1049 virtual bool RegisterSendCodec(int encoder_type,
1050 uint8_t payload_type,
1051 int frame_size_samples = 0) = 0;
1053 // Returns the encoder object currently in use. This is the same as the
1054 // codec that was registered in the latest call to RegisterSendCodec().
1055 virtual const AudioEncoder* GetSenderInfo() const = 0;
1057 // Temporary solution to be used during refactoring.
1058 virtual const CodecInst* GetSenderCodecInst() = 0;
1060 // Adds 10 ms of raw (PCM) audio data to the encoder. If the sampling
1061 // frequency of the audio does not match the sampling frequency of the
1062 // current encoder, ACM will resample the audio.
1065 // 0 successfully added the frame.
1066 // -1 some error occurred and data is not added.
1067 // < -1 to add the frame to the buffer n samples had to be
1068 // overwritten, -n is the return value in this case.
1069 // TODO(henrik.lundin): Make a better design for the return values. This one
1070 // is just a copy of the old API.
1071 virtual int Add10MsAudio(const AudioFrame& audio_frame) = 0;
1073 // Returns a combined info about the currently used decoder(s).
1074 virtual const ReceiverInfo* GetReceiverInfo() const = 0;
1076 // Registers a codec, specified by |receive_codec|, as receiving codec.
1077 // This API can be called multiple times. If registering with a payload type
1078 // that was already registered in a previous call, the latest call will
1079 // override previous calls. Returns true if successful, false if not.
1080 virtual bool RegisterReceiveCodec(AudioDecoder* receive_codec) = 0;
1082 // Temporary solution:
1083 // |decoder_type| should be from the anonymous enum in acm2::ACMCodecDB.
1084 virtual bool RegisterReceiveCodec(int decoder_type, uint8_t payload_type) = 0;
1086 // The following two methods both inserts a new packet to the receiver.
1087 // InsertPacket takes an RTP header input in |rtp_info|, while InsertPayload
1088 // only requires a payload type and a timestamp. The latter assumes that the
1089 // payloads come in the right order, and without any losses. In both cases,
1090 // |incoming_payload| contains the RTP payload after the RTP header. Return
1091 // true if successful, false if not.
1092 virtual bool InsertPacket(const uint8_t* incoming_payload,
1093 int32_t payload_len_bytes,
1094 const WebRtcRTPHeader& rtp_info) = 0;
1096 // TODO(henrik.lundin): Remove this method?
1097 virtual bool InsertPayload(const uint8_t* incoming_payload,
1098 int32_t payload_len_byte,
1099 uint8_t payload_type,
1100 uint32_t timestamp) = 0;
1102 // These two methods set a minimum and maximum jitter buffer delay in
1103 // milliseconds. The pupose is mainly to adjust the delay to synchronize
1104 // audio and video. The preferred jitter buffer size, computed by NetEq based
1105 // on the current channel conditions, is clamped from below and above by these
1106 // two methods. The given delay limits must be non-negative, less than
1107 // 10000 ms, and the minimum must be strictly smaller than the maximum.
1108 // Further, the maximum must be at lest one frame duration. If these
1109 // conditions are not met, false is returned. Giving the value 0 effectively
1110 // unsets the minimum or maximum delay limits.
1111 // Note that calling these methods is optional. If not called, NetEq will
1112 // determine the optimal buffer size based on the network conditions.
1113 virtual bool SetMinimumPlayoutDelay(int time_ms) = 0;
1115 virtual bool SetMaximumPlayoutDelay(int time_ms) = 0;
1117 // Returns the current value of the jitter buffer's preferred latency. This
1118 // is computed based on inter-arrival times and playout mode of NetEq. The
1119 // actual target delay is this value clamped from below and above by the
1120 // values specified through SetMinimumPlayoutDelay() and
1121 // SetMaximumPlayoutDelay(), respectively, if provided.
1122 // TODO(henrik.lundin) Rename to PreferredDelayMs?
1123 virtual int LeastRequiredDelayMs() const = 0;
1125 // The send timestamp of an RTP packet is associated with the decoded
1126 // audio of the packet in question. This function returns the timestamp of
1127 // the latest audio delivered by Get10MsAudio(). Returns false if no timestamp
1128 // can be provided, true otherwise.
1129 virtual bool PlayoutTimestamp(uint32_t* timestamp) = 0;
1131 // Delivers 10 ms of audio in |audio_frame|. Returns true if successful,
1133 virtual bool Get10MsAudio(AudioFrame* audio_frame) = 0;
1135 // Returns the network statistics. Note that the internal statistics of NetEq
1136 // are reset by this call. Returns true if successful, false otherwise.
1137 virtual bool NetworkStatistics(ACMNetworkStatistics* network_statistics) = 0;
1139 // Enables NACK and sets the maximum size of the NACK list. If NACK is already
1140 // enabled then the maximum NACK list size is modified accordingly. Returns
1141 // true if successful, false otherwise.
1143 // If the sequence number of last received packet is N, the sequence numbers
1144 // of NACK list are in the range of [N - |max_nack_list_size|, N).
1146 // |max_nack_list_size| should be positive and less than or equal to
1147 // |Nack::kNackListSizeLimit|.
1148 virtual bool EnableNack(size_t max_nack_list_size) = 0;
1151 virtual void DisableNack() = 0;
1154 // Temporary solution to be used during refactoring.
1155 // If DTX is enabled and the codec does not have internal DTX/VAD
1156 // WebRtc VAD will be automatically enabled and |enable_vad| is ignored.
1158 // If DTX is disabled but VAD is enabled no DTX packets are sent,
1159 // regardless of whether the codec has internal DTX/VAD or not. In this
1160 // case, WebRtc VAD is running to label frames as active/in-active.
1162 // NOTE! VAD/DTX is not supported when sending stereo.
1164 // Return true if successful, false otherwise.
1165 virtual bool SetVad(bool enable_dtx,
1167 ACMVADMode vad_mode) = 0;
1169 // Returns a list of packets to request retransmission of.
1170 // |round_trip_time_ms| is an estimate of the round-trip-time (in
1171 // milliseconds). Missing packets which will be decoded sooner than the
1172 // round-trip-time (with respect to the time this API is called) will not be
1173 // included in the list.
1174 // |round_trip_time_ms| must be non-negative.
1175 virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0;
1177 // Returns the timing statistics for calls to Get10MsAudio.
1178 virtual void GetDecodingCallStatistics(
1179 AudioDecodingCallStats* call_stats) const = 0;
1182 } // namespace webrtc
1184 #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_