2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_SOURCE_AUDIO_CODING_MODULE_IMPL_H_
12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_SOURCE_AUDIO_CODING_MODULE_IMPL_H_
16 #include "webrtc/common_types.h"
17 #include "webrtc/engine_configurations.h"
18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
19 #include "webrtc/modules/audio_coding/main/source/acm_codec_database.h"
20 #include "webrtc/modules/audio_coding/main/source/acm_neteq.h"
21 #include "webrtc/modules/audio_coding/main/source/acm_resampler.h"
22 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
23 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
27 struct WebRtcACMAudioBuff;
28 struct WebRtcACMCodecParams;
29 class CriticalSectionWrapper;
39 class ACMDTMFDetection;
40 class ACMGenericCodec;
42 class AudioCodingModuleImpl : public AudioCodingModule {
44 AudioCodingModuleImpl(const int32_t id, Clock* clock);
45 ~AudioCodingModuleImpl();
47 // Change the unique identifier of this object.
48 virtual int32_t ChangeUniqueId(const int32_t id);
50 // Returns the number of milliseconds until the module want a worker thread
52 int32_t TimeUntilNextProcess();
54 // Process any pending tasks such as timeouts.
57 /////////////////////////////////////////
61 // Initialize send codec.
62 int32_t InitializeSender();
65 int32_t ResetEncoder();
67 // Can be called multiple times for Codec, CNG, RED.
68 int32_t RegisterSendCodec(const CodecInst& send_codec);
70 // Register Secondary codec for dual-streaming. Dual-streaming is activated
71 // right after the secondary codec is registered.
72 int RegisterSecondarySendCodec(const CodecInst& send_codec);
74 // Unregister the secondary codec. Dual-streaming is deactivated right after
75 // deregistering secondary codec.
76 void UnregisterSecondarySendCodec();
78 // Get the secondary codec.
79 int SecondarySendCodec(CodecInst* secondary_codec) const;
81 // Get current send codec.
82 int32_t SendCodec(CodecInst* current_codec) const;
84 // Get current send frequency.
85 int32_t SendFrequency() const;
87 // Get encode bit-rate.
88 // Adaptive rate codecs return their current encode target rate, while other
89 // codecs return there long-term average or their fixed rate.
90 int32_t SendBitrate() const;
92 // Set available bandwidth, inform the encoder about the
93 // estimated bandwidth received from the remote party.
94 virtual int32_t SetReceivedEstimatedBandwidth(const int32_t bw);
96 // Register a transport callback which will be
97 // called to deliver the encoded buffers.
98 int32_t RegisterTransportCallback(AudioPacketizationCallback* transport);
100 // Add 10 ms of raw (PCM) audio data to the encoder.
101 int32_t Add10MsData(const AudioFrame& audio_frame);
103 /////////////////////////////////////////
104 // (FEC) Forward Error Correction
107 // Configure FEC status i.e on/off.
108 int32_t SetFECStatus(const bool enable_fec);
111 bool FECStatus() const;
113 /////////////////////////////////////////
114 // (VAD) Voice Activity Detection
116 // (CNG) Comfort Noise Generation
119 int32_t SetVAD(bool enable_dtx = true,
120 bool enable_vad = false,
121 ACMVADMode mode = VADNormal);
123 int32_t VAD(bool* dtx_enabled, bool* vad_enabled, ACMVADMode* mode) const;
125 int32_t RegisterVADCallback(ACMVADCallback* vad_callback);
127 /////////////////////////////////////////
131 // Initialize receiver, resets codec database etc.
132 int32_t InitializeReceiver();
134 // Reset the decoder state.
135 int32_t ResetDecoder();
137 // Get current receive frequency.
138 int32_t ReceiveFrequency() const;
140 // Get current playout frequency.
141 int32_t PlayoutFrequency() const;
143 // Register possible receive codecs, can be called multiple times,
144 // for codecs, CNG, DTMF, RED.
145 int32_t RegisterReceiveCodec(const CodecInst& receive_codec);
147 // Get current received codec.
148 int32_t ReceiveCodec(CodecInst* current_codec) const;
150 // Incoming packet from network parsed and ready for decode.
151 int32_t IncomingPacket(const uint8_t* incoming_payload,
152 const int32_t payload_length,
153 const WebRtcRTPHeader& rtp_info);
155 // Incoming payloads, without rtp-info, the rtp-info will be created in ACM.
156 // One usage for this API is when pre-encoded files are pushed in ACM.
157 int32_t IncomingPayload(const uint8_t* incoming_payload,
158 const int32_t payload_length,
159 const uint8_t payload_type,
160 const uint32_t timestamp = 0);
162 // NetEq minimum playout delay (used for lip-sync). The actual target delay
163 // is the max of |time_ms| and the required delay dictated by the channel.
164 int SetMinimumPlayoutDelay(int time_ms);
166 // NetEq maximum playout delay. The actual target delay is the min of
167 // |time_ms| and the required delay dictated by the channel.
168 int SetMaximumPlayoutDelay(int time_ms);
170 // The shortest latency, in milliseconds, required by jitter buffer. This
171 // is computed based on inter-arrival times and playout mode of NetEq. The
172 // actual delay is the maximum of least-required-delay and the minimum-delay
173 // specified by SetMinumumPlayoutDelay() API.
175 int LeastRequiredDelayMs() const ;
177 // Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf
179 int32_t SetDtmfPlayoutStatus(const bool enable);
181 // Get Dtmf playout status.
182 bool DtmfPlayoutStatus() const;
184 // Estimate the Bandwidth based on the incoming stream, needed
185 // for one way audio where the RTCP send the BW estimate.
186 // This is also done in the RTP module .
187 int32_t DecoderEstimatedBandwidth() const;
189 // Set playout mode voice, fax.
190 int32_t SetPlayoutMode(const AudioPlayoutMode mode);
192 // Get playout mode voice, fax.
193 AudioPlayoutMode PlayoutMode() const;
195 // Get playout timestamp.
196 int32_t PlayoutTimestamp(uint32_t* timestamp);
198 // Get 10 milliseconds of raw audio data to play out, and
199 // automatic resample to the requested frequency if > 0.
200 int32_t PlayoutData10Ms(int32_t desired_freq_hz,
201 AudioFrame* audio_frame);
203 /////////////////////////////////////////
207 int32_t NetworkStatistics(ACMNetworkStatistics* statistics);
209 void DestructEncoderInst(void* inst);
211 int16_t AudioBuffer(WebRtcACMAudioBuff& buffer);
213 // GET RED payload for iSAC. The method id called when 'this' ACM is
215 int32_t REDPayloadISAC(const int32_t isac_rate,
216 const int16_t isac_bw_estimate,
218 int16_t* length_bytes);
220 int16_t SetAudioBuffer(WebRtcACMAudioBuff& buffer);
222 uint32_t EarliestTimestamp() const;
224 int32_t LastEncodedTimestamp(uint32_t& timestamp) const;
226 int32_t ReplaceInternalDTXWithWebRtc(const bool use_webrtc_dtx);
228 int32_t IsInternalDTXReplacedWithWebRtc(bool* uses_webrtc_dtx);
230 int SetISACMaxRate(int max_bit_per_sec);
232 int SetISACMaxPayloadSize(int max_size_bytes);
234 int32_t ConfigISACBandwidthEstimator(
236 int rate_bit_per_sec,
237 bool enforce_frame_size = false);
239 int UnregisterReceiveCodec(uint8_t payload_type);
241 std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
244 void UnregisterSendCodec();
246 int32_t UnregisterReceiveCodecSafe(const int16_t id);
248 ACMGenericCodec* CreateCodec(const CodecInst& codec);
250 int16_t DecoderParamByPlType(const uint8_t payload_type,
251 WebRtcACMCodecParams& codec_params) const;
253 int16_t DecoderListIDByPlName(
254 const char* name, const uint16_t frequency = 0) const;
256 int32_t InitializeReceiverSafe();
258 bool HaveValidEncoder(const char* caller_name) const;
260 int32_t RegisterRecCodecMSSafe(const CodecInst& receive_codec,
263 ACMNetEQ::JitterBuffer jitter_buffer);
265 // Set VAD/DTX status. This function does not acquire a lock, and it is
266 // created to be called only from inside a critical section.
267 int SetVADSafe(bool enable_dtx, bool enable_vad, ACMVADMode mode);
269 // Process buffered audio when dual-streaming is not enabled (When RED is
270 // enabled still this function is used.)
271 int ProcessSingleStream();
273 // Process buffered audio when dual-streaming is enabled, i.e. secondary send
274 // codec is registered.
275 int ProcessDualStream();
277 // Preprocessing of input audio, including resampling and down-mixing if
278 // required, before pushing audio into encoder's buffer.
280 // in_frame: input audio-frame
281 // ptr_out: pointer to output audio_frame. If no preprocessing is required
282 // |ptr_out| will be pointing to |in_frame|, otherwise pointing to
283 // |preprocess_frame_|.
286 // -1: if encountering an error.
288 int PreprocessToAddData(const AudioFrame& in_frame,
289 const AudioFrame** ptr_out);
291 // Set initial playout delay.
292 // -delay_ms: delay in millisecond.
295 // -1: if cannot set the delay.
296 // 0: if delay set successfully.
297 int SetInitialPlayoutDelay(int delay_ms);
299 // Enable NACK and set the maximum size of the NACK list.
300 int EnableNack(size_t max_nack_list_size);
305 void GetDecodingCallStatistics(AudioDecodingCallStats* call_stats) const;
308 // Change required states after starting to receive the codec corresponding
310 int UpdateUponReceivingCodec(int index);
312 // Remove all slaves and initialize a stereo slave with required codecs
314 int InitStereoSlave();
316 // Returns true if the codec's |index| is registered with the master and
317 // is a stereo codec, RED or CN.
318 bool IsCodecForSlave(int index) const;
320 int EncodeFragmentation(int fragmentation_index, int payload_type,
321 uint32_t current_timestamp,
322 ACMGenericCodec* encoder,
325 void ResetFragmentation(int vector_size);
327 bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame);
329 // Push a synchronization packet into NetEq. Such packets result in a frame
330 // of zeros (not decoded by the corresponding decoder). The size of the frame
331 // is the same as last decoding. NetEq has a special payload for this.
332 // Call within the scope of ACM critical section.
333 int PushSyncPacketSafe();
335 // Update the parameters required in initial phase of buffering, when
336 // initial playout delay is requested. Call within the scope of ACM critical
338 void UpdateBufferingSafe(const WebRtcRTPHeader& rtp_info,
339 int payload_len_bytes);
342 // Return the timestamp of current time, computed according to sampling rate
343 // of the codec identified by |codec_id|.
345 uint32_t NowTimestamp(int codec_id);
347 AudioPacketizationCallback* packetization_callback_;
349 uint32_t last_timestamp_;
350 uint32_t last_in_timestamp_;
351 CodecInst send_codec_inst_;
352 uint8_t cng_nb_pltype_;
353 uint8_t cng_wb_pltype_;
354 uint8_t cng_swb_pltype_;
355 uint8_t cng_fb_pltype_;
359 ACMVADMode vad_mode_;
360 ACMGenericCodec* codecs_[ACMCodecDB::kMaxNumCodecs];
361 ACMGenericCodec* slave_codecs_[ACMCodecDB::kMaxNumCodecs];
362 int16_t mirror_codec_idx_[ACMCodecDB::kMaxNumCodecs];
363 bool stereo_receive_[ACMCodecDB::kMaxNumCodecs];
364 bool stereo_receive_registered_;
366 int prev_received_channel_;
367 int expected_channels_;
368 int32_t current_send_codec_idx_;
369 int current_receive_codec_idx_;
370 bool send_codec_registered_;
371 ACMResampler input_resampler_;
372 ACMResampler output_resampler_;
374 CriticalSectionWrapper* acm_crit_sect_;
375 ACMVADCallback* vad_callback_;
376 uint8_t last_recv_audio_codec_pltype_;
381 // TODO(turajs): |red_buffer_| is allocated in constructor, why having them
382 // as pointers and not an array. If concerned about the memory, then make a
383 // set-up function to allocate them only when they are going to be used, i.e.
384 // FEC or Dual-streaming is enabled.
385 uint8_t* red_buffer_;
386 // TODO(turajs): we actually don't need |fragmentation_| as a member variable.
387 // It is sufficient to keep the length & payload type of previous payload in
389 RTPFragmentationHeader fragmentation_;
390 uint32_t last_fec_timestamp_;
391 // If no RED is registered as receive codec this
392 // will have an invalid value.
393 uint8_t receive_red_pltype_;
395 // This is to keep track of CN instances where we can send DTMFs.
396 uint8_t previous_pltype_;
398 // This keeps track of payload types associated with codecs_[].
399 // We define it as signed variable and initialize with -1 to indicate
401 int16_t registered_pltypes_[ACMCodecDB::kMaxNumCodecs];
403 // Used when payloads are pushed into ACM without any RTP info
404 // One example is when pre-encoded bit-stream is pushed from
406 WebRtcRTPHeader* dummy_rtp_header_;
407 uint16_t recv_pl_frame_size_smpls_;
409 bool receiver_initialized_;
410 ACMDTMFDetection* dtmf_detector_;
412 AudioCodingFeedback* dtmf_callback_;
413 int16_t last_detected_tone_;
414 CriticalSectionWrapper* callback_crit_sect_;
416 AudioFrame audio_frame_;
417 AudioFrame preprocess_frame_;
418 CodecInst secondary_send_codec_inst_;
419 scoped_ptr<ACMGenericCodec> secondary_encoder_;
422 int initial_delay_ms_;
423 int num_packets_accumulated_;
424 int num_bytes_accumulated_;
425 int accumulated_audio_ms_;
426 int first_payload_received_;
427 uint32_t last_incoming_send_timestamp_;
428 bool track_neteq_buffer_;
429 uint32_t playout_ts_;
431 // AV-sync is enabled. In AV-sync mode, sync packet pushed during long packet
435 // Latest send timestamp difference of two consecutive packets.
436 uint32_t last_timestamp_diff_;
437 uint16_t last_sequence_number_;
439 bool last_packet_was_sync_;
440 int64_t last_receive_timestamp_;
443 scoped_ptr<acm2::Nack> nack_;
446 acm2::CallStatistics call_stats_;
451 } // namespace webrtc
453 #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_SOURCE_AUDIO_CODING_MODULE_IMPL_H_