2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
13 #include <assert.h> // assert
14 #include <math.h> // pow()
15 #include <string.h> // memcpy()
17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
18 #include "webrtc/system_wrappers/interface/logging.h"
19 #include "webrtc/system_wrappers/interface/trace_event.h"
22 RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
23 int32_t id, RtpData* data_callback,
24 RtpAudioFeedback* incoming_messages_callback) {
25 return new RTPReceiverAudio(id, data_callback, incoming_messages_callback);
28 RTPReceiverAudio::RTPReceiverAudio(const int32_t id,
29 RtpData* data_callback,
30 RtpAudioFeedback* incoming_messages_callback)
31 : RTPReceiverStrategy(data_callback),
32 TelephoneEventHandler(),
34 last_received_frequency_(8000),
35 telephone_event_forward_to_decoder_(false),
36 telephone_event_payload_type_(-1),
37 cng_nb_payload_type_(-1),
38 cng_wb_payload_type_(-1),
39 cng_swb_payload_type_(-1),
40 cng_fb_payload_type_(-1),
41 cng_payload_type_(-1),
42 g722_payload_type_(-1),
43 last_received_g722_(false),
45 current_remote_energy_(),
46 cb_audio_feedback_(incoming_messages_callback) {
47 last_payload_.Audio.channels = 1;
48 memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
51 // Outband TelephoneEvent(DTMF) detection
52 void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
53 bool forward_to_decoder) {
54 CriticalSectionScoped lock(crit_sect_.get());
55 telephone_event_forward_to_decoder_ = forward_to_decoder;
58 // Is forwarding of outband telephone events turned on/off?
59 bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
60 CriticalSectionScoped lock(crit_sect_.get());
61 return telephone_event_forward_to_decoder_;
64 bool RTPReceiverAudio::TelephoneEventPayloadType(
65 int8_t payload_type) const {
66 CriticalSectionScoped lock(crit_sect_.get());
67 return (telephone_event_payload_type_ == payload_type) ? true : false;
70 bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
72 bool* cng_payload_type_has_changed) {
73 CriticalSectionScoped lock(crit_sect_.get());
74 *cng_payload_type_has_changed = false;
76 // We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
77 if (cng_nb_payload_type_ == payload_type) {
79 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
80 *cng_payload_type_has_changed = true;
82 cng_payload_type_ = cng_nb_payload_type_;
84 } else if (cng_wb_payload_type_ == payload_type) {
85 // if last received codec is G.722 we must use frequency 8000
86 if (last_received_g722_) {
91 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
92 *cng_payload_type_has_changed = true;
93 cng_payload_type_ = cng_wb_payload_type_;
95 } else if (cng_swb_payload_type_ == payload_type) {
97 if ((cng_payload_type_ != -1) &&
98 (cng_payload_type_ != cng_swb_payload_type_))
99 *cng_payload_type_has_changed = true;
100 cng_payload_type_ = cng_swb_payload_type_;
102 } else if (cng_fb_payload_type_ == payload_type) {
104 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
105 *cng_payload_type_has_changed = true;
106 cng_payload_type_ = cng_fb_payload_type_;
110 if (g722_payload_type_ == payload_type) {
111 last_received_g722_ = true;
113 last_received_g722_ = false;
119 bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
120 // Don't do this for DTMF packets, otherwise it's fine.
121 return !TelephoneEventPayloadType(payload_type);
124 // - Sample based or frame based codecs based on RFC 3551
126 // - NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
127 // - The correct rate is 4 bits/sample.
129 // - name of sampling default
130 // - encoding sample/frame bits/sample rate ms/frame ms/packet
132 // - Sample based audio codecs
133 // - DVI4 sample 4 var. 20
134 // - G722 sample 4 16,000 20
135 // - G726-40 sample 5 8,000 20
136 // - G726-32 sample 4 8,000 20
137 // - G726-24 sample 3 8,000 20
138 // - G726-16 sample 2 8,000 20
139 // - L8 sample 8 var. 20
140 // - L16 sample 16 var. 20
141 // - PCMA sample 8 var. 20
142 // - PCMU sample 8 var. 20
144 // - Frame based audio codecs
145 // - G723 frame N/A 8,000 30 30
146 // - G728 frame N/A 8,000 2.5 20
147 // - G729 frame N/A 8,000 10 20
148 // - G729D frame N/A 8,000 10 20
149 // - G729E frame N/A 8,000 10 20
150 // - GSM frame N/A 8,000 20 20
151 // - GSM-EFR frame N/A 8,000 20 20
152 // - LPC frame N/A 8,000 20 20
153 // - MPA frame N/A var. var.
156 int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
157 const char payload_name[RTP_PAYLOAD_NAME_SIZE],
159 uint32_t frequency) {
160 CriticalSectionScoped lock(crit_sect_.get());
162 if (ModuleRTPUtility::StringCompare(payload_name, "telephone-event", 15)) {
163 telephone_event_payload_type_ = payload_type;
165 if (ModuleRTPUtility::StringCompare(payload_name, "cn", 2)) {
166 // we can have three CNG on 8000Hz, 16000Hz and 32000Hz
167 if (frequency == 8000) {
168 cng_nb_payload_type_ = payload_type;
169 } else if (frequency == 16000) {
170 cng_wb_payload_type_ = payload_type;
171 } else if (frequency == 32000) {
172 cng_swb_payload_type_ = payload_type;
173 } else if (frequency == 48000) {
174 cng_fb_payload_type_ = payload_type;
183 int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
184 const PayloadUnion& specific_payload,
186 const uint8_t* payload,
187 uint16_t payload_length,
188 int64_t timestamp_ms,
189 bool is_first_packet) {
190 TRACE_EVENT2("webrtc_rtp", "Audio::ParseRtp",
191 "seqnum", rtp_header->header.sequenceNumber,
192 "timestamp", rtp_header->header.timestamp);
193 rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
194 num_energy_ = rtp_header->type.Audio.numEnergy;
195 if (rtp_header->type.Audio.numEnergy > 0 &&
196 rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
197 memcpy(current_remote_energy_,
198 rtp_header->type.Audio.arrOfEnergy,
199 rtp_header->type.Audio.numEnergy);
202 return ParseAudioCodecSpecific(rtp_header,
205 specific_payload.Audio,
209 int RTPReceiverAudio::GetPayloadTypeFrequency() const {
210 CriticalSectionScoped lock(crit_sect_.get());
211 if (last_received_g722_) {
214 return last_received_frequency_;
217 RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
218 uint16_t last_payload_length) const {
220 // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
221 // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
222 if (last_payload_length < 10) { // our CNG is 9 bytes
229 void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
230 PayloadUnion* specific_payload,
231 bool* should_reset_statistics,
232 bool* should_discard_changes) {
233 *should_discard_changes = false;
234 *should_reset_statistics = false;
236 if (TelephoneEventPayloadType(payload_type)) {
237 // Don't do callbacks for DTMF packets.
238 *should_discard_changes = true;
241 // frequency is updated for CNG
242 bool cng_payload_type_has_changed = false;
243 bool is_cng_payload_type = CNGPayloadType(payload_type,
244 &specific_payload->Audio.frequency,
245 &cng_payload_type_has_changed);
247 *should_reset_statistics = cng_payload_type_has_changed;
249 if (is_cng_payload_type) {
250 // Don't do callbacks for DTMF packets.
251 *should_discard_changes = true;
256 int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
257 CriticalSectionScoped cs(crit_sect_.get());
259 assert(num_energy_ <= kRtpCsrcSize);
261 if (num_energy_ > 0) {
262 memcpy(array_of_energy, current_remote_energy_,
263 sizeof(uint8_t) * num_energy_);
268 int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
269 RtpFeedback* callback,
272 const char payload_name[RTP_PAYLOAD_NAME_SIZE],
273 const PayloadUnion& specific_payload) const {
274 if (-1 == callback->OnInitializeDecoder(id,
277 specific_payload.Audio.frequency,
278 specific_payload.Audio.channels,
279 specific_payload.Audio.rate)) {
280 LOG(LS_ERROR) << "Failed to create decoder for payload type: "
281 << payload_name << "/" << payload_type;
287 // We are not allowed to have any critsects when calling data_callback.
288 int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
289 WebRtcRTPHeader* rtp_header,
290 const uint8_t* payload_data,
291 uint16_t payload_length,
292 const AudioPayload& audio_specific,
295 if (payload_length == 0) {
299 bool telephone_event_packet =
300 TelephoneEventPayloadType(rtp_header->header.payloadType);
301 if (telephone_event_packet) {
302 CriticalSectionScoped lock(crit_sect_.get());
306 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
307 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
308 // | event |E|R| volume | duration |
309 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
311 if (payload_length % 4 != 0) {
314 uint8_t number_of_events = payload_length / 4;
317 if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
318 number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
320 for (int n = 0; n < number_of_events; ++n) {
321 bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
323 std::set<uint8_t>::iterator event =
324 telephone_event_reported_.find(payload_data[4 * n]);
326 if (event != telephone_event_reported_.end()) {
327 // we have already seen this event
329 telephone_event_reported_.erase(payload_data[4 * n]);
333 // don't add if it's a end of a tone
335 telephone_event_reported_.insert(payload_data[4 * n]);
340 // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
341 // should not be a problem since we don't care about the duration
343 // RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet
347 CriticalSectionScoped lock(crit_sect_.get());
349 if (!telephone_event_packet) {
350 last_received_frequency_ = audio_specific.frequency;
353 // Check if this is a CNG packet, receiver might want to know
356 if (CNGPayloadType(rtp_header->header.payloadType,
359 rtp_header->type.Audio.isCNG = true;
360 rtp_header->frameType = kAudioFrameCN;
362 rtp_header->frameType = kAudioFrameSpeech;
363 rtp_header->type.Audio.isCNG = false;
366 // check if it's a DTMF event, hence something we can playout
367 if (telephone_event_packet) {
368 if (!telephone_event_forward_to_decoder_) {
369 // don't forward event to decoder
372 std::set<uint8_t>::iterator first =
373 telephone_event_reported_.begin();
374 if (first != telephone_event_reported_.end() && *first > 15) {
375 // don't forward non DTMF events
380 // TODO(holmer): Break this out to have RED parsing handled generically.
381 if (is_red && !(payload_data[0] & 0x80)) {
382 // we recive only one frame packed in a RED packet remove the RED wrapper
383 rtp_header->header.payloadType = payload_data[0];
385 // only one frame in the RED strip the one byte to help NetEq
386 return data_callback_->OnReceivedPayloadData(
387 payload_data + 1, payload_length - 1, rtp_header);
390 rtp_header->type.Audio.channel = audio_specific.channels;
391 return data_callback_->OnReceivedPayloadData(
392 payload_data, payload_length, rtp_header);
394 } // namespace webrtc