src/third_party/webrtc/modules/rtp_rtcp/source/rtp_sender_audio.cc

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/rtp_rtcp/source/rtp_sender_audio.h"
  12
  13 #include <assert.h> //assert
  14 #include <string.h> //memcpy
  15
  16 #include "webrtc/system_wrappers/interface/trace_event.h"
  17
  18 namespace webrtc {
  19 RTPSenderAudio::RTPSenderAudio(const int32_t id, Clock* clock,
  20                                RTPSender* rtpSender) :
  21     _id(id),
  22     _clock(clock),
  23     _rtpSender(rtpSender),
  24     _audioFeedbackCritsect(CriticalSectionWrapper::CreateCriticalSection()),
  25     _audioFeedback(NULL),
  26     _sendAudioCritsect(CriticalSectionWrapper::CreateCriticalSection()),
  27     _frequency(8000),
  28     _packetSizeSamples(160),
  29     _dtmfEventIsOn(false),
  30     _dtmfEventFirstPacketSent(false),
  31     _dtmfPayloadType(-1),
  32     _dtmfTimestamp(0),
  33     _dtmfKey(0),
  34     _dtmfLengthSamples(0),
  35     _dtmfLevel(0),
  36     _dtmfTimeLastSent(0),
  37     _dtmfTimestampLastSent(0),
  38     _REDPayloadType(-1),
  39     _inbandVADactive(false),
  40     _cngNBPayloadType(-1),
  41     _cngWBPayloadType(-1),
  42     _cngSWBPayloadType(-1),
  43     _cngFBPayloadType(-1),
  44     _lastPayloadType(-1),
  45     _audioLevel_dBov(0) {
  46 };
  47
  48 RTPSenderAudio::~RTPSenderAudio()
  49 {
  50     delete _sendAudioCritsect;
  51     delete _audioFeedbackCritsect;
  52 }
  53
  54 int32_t
  55 RTPSenderAudio::RegisterAudioCallback(RtpAudioFeedback* messagesCallback)
  56 {
  57     CriticalSectionScoped cs(_audioFeedbackCritsect);
  58     _audioFeedback = messagesCallback;
  59     return 0;
  60 }
  61
  62 void
  63 RTPSenderAudio::SetAudioFrequency(const uint32_t f)
  64 {
  65     CriticalSectionScoped cs(_sendAudioCritsect);
  66     _frequency = f;
  67 }
  68
  69 int
  70 RTPSenderAudio::AudioFrequency() const
  71 {
  72     CriticalSectionScoped cs(_sendAudioCritsect);
  73     return _frequency;
  74 }
  75
  76     // set audio packet size, used to determine when it's time to send a DTMF packet in silence (CNG)
  77 int32_t
  78 RTPSenderAudio::SetAudioPacketSize(const uint16_t packetSizeSamples)
  79 {
  80     CriticalSectionScoped cs(_sendAudioCritsect);
  81
  82     _packetSizeSamples = packetSizeSamples;
  83     return 0;
  84 }
  85
  86 int32_t RTPSenderAudio::RegisterAudioPayload(
  87     const char payloadName[RTP_PAYLOAD_NAME_SIZE],
  88     const int8_t payloadType,
  89     const uint32_t frequency,
  90     const uint8_t channels,
  91     const uint32_t rate,
  92     RtpUtility::Payload*& payload) {
  93   CriticalSectionScoped cs(_sendAudioCritsect);
  94
  95   if (RtpUtility::StringCompare(payloadName, "cn", 2)) {
  96     //  we can have multiple CNG payload types
  97     if (frequency == 8000) {
  98       _cngNBPayloadType = payloadType;
  99
 100     } else if (frequency == 16000) {
 101       _cngWBPayloadType = payloadType;
 102
 103     } else if (frequency == 32000) {
 104       _cngSWBPayloadType = payloadType;
 105
 106     } else if (frequency == 48000) {
 107       _cngFBPayloadType = payloadType;
 108
 109     } else {
 110       return -1;
 111     }
 112   }
 113   if (RtpUtility::StringCompare(payloadName, "telephone-event", 15)) {
 114     // Don't add it to the list
 115     // we dont want to allow send with a DTMF payloadtype
 116     _dtmfPayloadType = payloadType;
 117     return 0;
 118     // The default timestamp rate is 8000 Hz, but other rates may be defined.
 119   }
 120   payload = new RtpUtility::Payload;
 121   payload->typeSpecific.Audio.frequency = frequency;
 122   payload->typeSpecific.Audio.channels = channels;
 123   payload->typeSpecific.Audio.rate = rate;
 124   payload->audio = true;
 125   payload->name[RTP_PAYLOAD_NAME_SIZE - 1] = 0;
 126   strncpy(payload->name, payloadName, RTP_PAYLOAD_NAME_SIZE - 1);
 127   return 0;
 128 }
 129
 130 bool
 131 RTPSenderAudio::MarkerBit(const FrameType frameType,
 132                           const int8_t payloadType)
 133 {
 134     CriticalSectionScoped cs(_sendAudioCritsect);
 135
 136     // for audio true for first packet in a speech burst
 137     bool markerBit = false;
 138     if(_lastPayloadType != payloadType)
 139     {
 140         if(_cngNBPayloadType != -1)
 141         {
 142             // we have configured NB CNG
 143             if(_cngNBPayloadType == payloadType)
 144             {
 145                 // only set a marker bit when we change payload type to a non CNG
 146                 return false;
 147             }
 148         }
 149         if(_cngWBPayloadType != -1)
 150         {
 151             // we have configured WB CNG
 152             if(_cngWBPayloadType == payloadType)
 153             {
 154                 // only set a marker bit when we change payload type to a non CNG
 155                 return false;
 156             }
 157         }
 158         if(_cngSWBPayloadType != -1)
 159         {
 160             // we have configured SWB CNG
 161             if(_cngSWBPayloadType == payloadType)
 162             {
 163                 // only set a marker bit when we change payload type to a non CNG
 164                 return false;
 165             }
 166         }
 167         if(_cngFBPayloadType != -1)
 168         {
 169             // we have configured SWB CNG
 170             if(_cngFBPayloadType == payloadType)
 171             {
 172                 // only set a marker bit when we change payload type to a non CNG
 173                 return false;
 174             }
 175         }
 176         // payloadType differ
 177         if(_lastPayloadType == -1)
 178         {
 179             if(frameType != kAudioFrameCN)
 180             {
 181                 // first packet and NOT CNG
 182                 return true;
 183
 184             }else
 185             {
 186                 // first packet and CNG
 187                 _inbandVADactive = true;
 188                 return false;
 189             }
 190         }
 191         // not first packet AND
 192         // not CNG AND
 193         // payloadType changed
 194
 195         // set a marker bit when we change payload type
 196         markerBit = true;
 197     }
 198
 199     // For G.723 G.729, AMR etc we can have inband VAD
 200     if(frameType == kAudioFrameCN)
 201     {
 202         _inbandVADactive = true;
 203
 204     } else if(_inbandVADactive)
 205     {
 206         _inbandVADactive = false;
 207         markerBit = true;
 208     }
 209     return markerBit;
 210 }
 211
 212 bool
 213 RTPSenderAudio::SendTelephoneEventActive(int8_t& telephoneEvent) const
 214 {
 215     if(_dtmfEventIsOn)
 216     {
 217         telephoneEvent = _dtmfKey;
 218         return true;
 219     }
 220     int64_t delaySinceLastDTMF = _clock->TimeInMilliseconds() -
 221         _dtmfTimeLastSent;
 222     if(delaySinceLastDTMF < 100)
 223     {
 224         telephoneEvent = _dtmfKey;
 225         return true;
 226     }
 227     telephoneEvent = -1;
 228     return false;
 229 }
 230
 231 int32_t RTPSenderAudio::SendAudio(
 232     const FrameType frameType,
 233     const int8_t payloadType,
 234     const uint32_t captureTimeStamp,
 235     const uint8_t* payloadData,
 236     const uint32_t dataSize,
 237     const RTPFragmentationHeader* fragmentation) {
 238   // TODO(pwestin) Breakup function in smaller functions.
 239   uint16_t payloadSize = static_cast<uint16_t>(dataSize);
 240   uint16_t maxPayloadLength = _rtpSender->MaxPayloadLength();
 241   bool dtmfToneStarted = false;
 242   uint16_t dtmfLengthMS = 0;
 243   uint8_t key = 0;
 244
 245   // Check if we have pending DTMFs to send
 246   if (!_dtmfEventIsOn && PendingDTMF()) {
 247     CriticalSectionScoped cs(_sendAudioCritsect);
 248
 249     int64_t delaySinceLastDTMF = _clock->TimeInMilliseconds() -
 250         _dtmfTimeLastSent;
 251
 252     if (delaySinceLastDTMF > 100) {
 253       // New tone to play
 254       _dtmfTimestamp = captureTimeStamp;
 255       if (NextDTMF(&key, &dtmfLengthMS, &_dtmfLevel) >= 0) {
 256         _dtmfEventFirstPacketSent = false;
 257         _dtmfKey = key;
 258         _dtmfLengthSamples = (_frequency / 1000) * dtmfLengthMS;
 259         dtmfToneStarted = true;
 260         _dtmfEventIsOn = true;
 261       }
 262     }
 263   }
 264   if (dtmfToneStarted) {
 265     CriticalSectionScoped cs(_audioFeedbackCritsect);
 266     if (_audioFeedback) {
 267       _audioFeedback->OnPlayTelephoneEvent(_id, key, dtmfLengthMS, _dtmfLevel);
 268     }
 269   }
 270
 271   // A source MAY send events and coded audio packets for the same time
 272   // but we don't support it
 273   {
 274     _sendAudioCritsect->Enter();
 275
 276     if (_dtmfEventIsOn) {
 277       if (frameType == kFrameEmpty) {
 278         // kFrameEmpty is used to drive the DTMF when in CN mode
 279         // it can be triggered more frequently than we want to send the
 280         // DTMF packets.
 281         if (_packetSizeSamples > (captureTimeStamp - _dtmfTimestampLastSent)) {
 282           // not time to send yet
 283           _sendAudioCritsect->Leave();
 284           return 0;
 285         }
 286       }
 287       _dtmfTimestampLastSent = captureTimeStamp;
 288       uint32_t dtmfDurationSamples = captureTimeStamp - _dtmfTimestamp;
 289       bool ended = false;
 290       bool send = true;
 291
 292       if (_dtmfLengthSamples > dtmfDurationSamples) {
 293         if (dtmfDurationSamples <= 0) {
 294           // Skip send packet at start, since we shouldn't use duration 0
 295           send = false;
 296         }
 297       } else {
 298         ended = true;
 299         _dtmfEventIsOn = false;
 300         _dtmfTimeLastSent = _clock->TimeInMilliseconds();
 301       }
 302       // don't hold the critsect while calling SendTelephoneEventPacket
 303       _sendAudioCritsect->Leave();
 304       if (send) {
 305         if (dtmfDurationSamples > 0xffff) {
 306           // RFC 4733 2.5.2.3 Long-Duration Events
 307           SendTelephoneEventPacket(ended, _dtmfTimestamp,
 308                                    static_cast<uint16_t>(0xffff), false);
 309
 310           // set new timestap for this segment
 311           _dtmfTimestamp = captureTimeStamp;
 312           dtmfDurationSamples -= 0xffff;
 313           _dtmfLengthSamples -= 0xffff;
 314
 315           return SendTelephoneEventPacket(
 316               ended,
 317               _dtmfTimestamp,
 318               static_cast<uint16_t>(dtmfDurationSamples),
 319               false);
 320         } else {
 321           // set markerBit on the first packet in the burst
 322           _dtmfEventFirstPacketSent = true;
 323           return SendTelephoneEventPacket(
 324               ended,
 325               _dtmfTimestamp,
 326               static_cast<uint16_t>(dtmfDurationSamples),
 327               !_dtmfEventFirstPacketSent);
 328         }
 329       }
 330       return 0;
 331     }
 332     _sendAudioCritsect->Leave();
 333   }
 334   if (payloadSize == 0 || payloadData == NULL) {
 335     if (frameType == kFrameEmpty) {
 336       // we don't send empty audio RTP packets
 337       // no error since we use it to drive DTMF when we use VAD
 338       return 0;
 339     }
 340     return -1;
 341   }
 342   uint8_t dataBuffer[IP_PACKET_SIZE];
 343   bool markerBit = MarkerBit(frameType, payloadType);
 344
 345   int32_t rtpHeaderLength = 0;
 346   uint16_t timestampOffset = 0;
 347
 348   if (_REDPayloadType >= 0 && fragmentation && !markerBit &&
 349       fragmentation->fragmentationVectorSize > 1) {
 350     // have we configured RED? use its payload type
 351     // we need to get the current timestamp to calc the diff
 352     uint32_t oldTimeStamp = _rtpSender->Timestamp();
 353     rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, _REDPayloadType,
 354                                                  markerBit, captureTimeStamp,
 355                                                  _clock->TimeInMilliseconds());
 356
 357     timestampOffset = uint16_t(_rtpSender->Timestamp() - oldTimeStamp);
 358   } else {
 359     rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, payloadType,
 360                                                  markerBit, captureTimeStamp,
 361                                                  _clock->TimeInMilliseconds());
 362   }
 363   if (rtpHeaderLength <= 0) {
 364     return -1;
 365   }
 366   if (maxPayloadLength < (rtpHeaderLength + payloadSize)) {
 367     // Too large payload buffer.
 368     return -1;
 369   }
 370   {
 371     CriticalSectionScoped cs(_sendAudioCritsect);
 372     if (_REDPayloadType >= 0 &&  // Have we configured RED?
 373         fragmentation &&
 374         fragmentation->fragmentationVectorSize > 1 &&
 375         !markerBit) {
 376       if (timestampOffset <= 0x3fff) {
 377         if(fragmentation->fragmentationVectorSize != 2) {
 378           // we only support 2 codecs when using RED
 379           return -1;
 380         }
 381         // only 0x80 if we have multiple blocks
 382         dataBuffer[rtpHeaderLength++] = 0x80 +
 383             fragmentation->fragmentationPlType[1];
 384         uint32_t blockLength = fragmentation->fragmentationLength[1];
 385
 386         // sanity blockLength
 387         if(blockLength > 0x3ff) {  // block length 10 bits 1023 bytes
 388           return -1;
 389         }
 390         uint32_t REDheader = (timestampOffset << 10) + blockLength;
 391         RtpUtility::AssignUWord24ToBuffer(dataBuffer + rtpHeaderLength,
 392                                           REDheader);
 393         rtpHeaderLength += 3;
 394
 395         dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
 396         // copy the RED data
 397         memcpy(dataBuffer+rtpHeaderLength,
 398                payloadData + fragmentation->fragmentationOffset[1],
 399                fragmentation->fragmentationLength[1]);
 400
 401         // copy the normal data
 402         memcpy(dataBuffer+rtpHeaderLength +
 403                fragmentation->fragmentationLength[1],
 404                payloadData + fragmentation->fragmentationOffset[0],
 405                fragmentation->fragmentationLength[0]);
 406
 407         payloadSize = static_cast<uint16_t>(
 408             fragmentation->fragmentationLength[0] +
 409             fragmentation->fragmentationLength[1]);
 410       } else {
 411         // silence for too long send only new data
 412         dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
 413         memcpy(dataBuffer+rtpHeaderLength,
 414                payloadData + fragmentation->fragmentationOffset[0],
 415                fragmentation->fragmentationLength[0]);
 416
 417         payloadSize = static_cast<uint16_t>(
 418             fragmentation->fragmentationLength[0]);
 419       }
 420     } else {
 421       if (fragmentation && fragmentation->fragmentationVectorSize > 0) {
 422         // use the fragment info if we have one
 423         dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
 424         memcpy( dataBuffer+rtpHeaderLength,
 425                 payloadData + fragmentation->fragmentationOffset[0],
 426                 fragmentation->fragmentationLength[0]);
 427
 428         payloadSize = static_cast<uint16_t>(
 429             fragmentation->fragmentationLength[0]);
 430       } else {
 431         memcpy(dataBuffer+rtpHeaderLength, payloadData, payloadSize);
 432       }
 433     }
 434     _lastPayloadType = payloadType;
 435
 436     // Update audio level extension, if included.
 437     {
 438       uint16_t packetSize = payloadSize + rtpHeaderLength;
 439       RtpUtility::RtpHeaderParser rtp_parser(dataBuffer, packetSize);
 440       RTPHeader rtp_header;
 441       rtp_parser.Parse(rtp_header);
 442       _rtpSender->UpdateAudioLevel(dataBuffer, packetSize, rtp_header,
 443                                    (frameType == kAudioFrameSpeech),
 444                                    _audioLevel_dBov);
 445     }
 446   }  // end critical section
 447   TRACE_EVENT_ASYNC_END2("webrtc", "Audio", captureTimeStamp,
 448                          "timestamp", _rtpSender->Timestamp(),
 449                          "seqnum", _rtpSender->SequenceNumber());
 450   return _rtpSender->SendToNetwork(dataBuffer,
 451                                    payloadSize,
 452                                    static_cast<uint16_t>(rtpHeaderLength),
 453                                    -1,
 454                                    kAllowRetransmission,
 455                                    PacedSender::kHighPriority);
 456 }
 457
 458     // Audio level magnitude and voice activity flag are set for each RTP packet
 459 int32_t
 460 RTPSenderAudio::SetAudioLevel(const uint8_t level_dBov)
 461 {
 462     if (level_dBov > 127)
 463     {
 464         return -1;
 465     }
 466     CriticalSectionScoped cs(_sendAudioCritsect);
 467     _audioLevel_dBov = level_dBov;
 468     return 0;
 469 }
 470
 471     // Set payload type for Redundant Audio Data RFC 2198
 472 int32_t
 473 RTPSenderAudio::SetRED(const int8_t payloadType)
 474 {
 475     if(payloadType < -1 )
 476     {
 477         return -1;
 478     }
 479     _REDPayloadType = payloadType;
 480     return 0;
 481 }
 482
 483     // Get payload type for Redundant Audio Data RFC 2198
 484 int32_t
 485 RTPSenderAudio::RED(int8_t& payloadType) const
 486 {
 487     if(_REDPayloadType == -1)
 488     {
 489         // not configured
 490         return -1;
 491     }
 492     payloadType = _REDPayloadType;
 493     return 0;
 494 }
 495
 496 // Send a TelephoneEvent tone using RFC 2833 (4733)
 497 int32_t
 498 RTPSenderAudio::SendTelephoneEvent(const uint8_t key,
 499                                    const uint16_t time_ms,
 500                                    const uint8_t level)
 501 {
 502     // DTMF is protected by its own critsect
 503     if(_dtmfPayloadType < 0)
 504     {
 505         // TelephoneEvent payloadtype not configured
 506         return -1;
 507     }
 508     return AddDTMF(key, time_ms, level);
 509 }
 510
 511 int32_t
 512 RTPSenderAudio::SendTelephoneEventPacket(const bool ended,
 513                                          const uint32_t dtmfTimeStamp,
 514                                          const uint16_t duration,
 515                                          const bool markerBit)
 516 {
 517     uint8_t dtmfbuffer[IP_PACKET_SIZE];
 518     uint8_t sendCount = 1;
 519     int32_t retVal = 0;
 520
 521     if(ended)
 522     {
 523         // resend last packet in an event 3 times
 524         sendCount = 3;
 525     }
 526     do
 527     {
 528         _sendAudioCritsect->Enter();
 529
 530         //Send DTMF data
 531         _rtpSender->BuildRTPheader(dtmfbuffer, _dtmfPayloadType, markerBit,
 532                                    dtmfTimeStamp, _clock->TimeInMilliseconds());
 533
 534         // reset CSRC and X bit
 535         dtmfbuffer[0] &= 0xe0;
 536
 537         //Create DTMF data
 538         /*    From RFC 2833:
 539
 540          0                   1                   2                   3
 541          0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 542         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 543         |     event     |E|R| volume    |          duration             |
 544         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 545         */
 546         // R bit always cleared
 547         uint8_t R = 0x00;
 548         uint8_t volume = _dtmfLevel;
 549
 550         // First packet un-ended
 551           uint8_t E = 0x00;
 552
 553         if(ended)
 554         {
 555             E = 0x80;
 556         }
 557
 558         // First byte is Event number, equals key number
 559         dtmfbuffer[12] = _dtmfKey;
 560         dtmfbuffer[13] = E|R|volume;
 561         RtpUtility::AssignUWord16ToBuffer(dtmfbuffer + 14, duration);
 562
 563         _sendAudioCritsect->Leave();
 564         TRACE_EVENT_INSTANT2("webrtc_rtp",
 565                              "Audio::SendTelephoneEvent",
 566                              "timestamp", dtmfTimeStamp,
 567                              "seqnum", _rtpSender->SequenceNumber());
 568         retVal = _rtpSender->SendToNetwork(dtmfbuffer, 4, 12, -1,
 569                                            kAllowRetransmission,
 570                                            PacedSender::kHighPriority);
 571         sendCount--;
 572
 573     }while (sendCount > 0 && retVal == 0);
 574
 575     return retVal;
 576 }
 577 }  // namespace webrtc