libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "avcodec.h"
  31 #include "internal.h"
  32 #include "g722.h"
  33 #include "libavutil/common.h"
  34
  35 #define FREEZE_INTERVAL 128
  36
  37 /* This is an arbitrary value. Allowing insanely large values leads to strange
  38    problems, so we limit it to a reasonable value */
  39 #define MAX_FRAME_SIZE 32768
  40
  41 /* We clip the value of avctx->trellis to prevent data type overflows and
  42    undefined behavior. Using larger values is insanely slow anyway. */
  43 #define MIN_TRELLIS 0
  44 #define MAX_TRELLIS 16
  45
  46 static av_cold int g722_encode_close(AVCodecContext *avctx)
  47 {
  48     G722Context *c = avctx->priv_data;
  49     int i;
  50     for (i = 0; i < 2; i++) {
  51         av_freep(&c->paths[i]);
  52         av_freep(&c->node_buf[i]);
  53         av_freep(&c->nodep_buf[i]);
  54     }
  55     return 0;
  56 }
  57
  58 static av_cold int g722_encode_init(AVCodecContext * avctx)
  59 {
  60     G722Context *c = avctx->priv_data;
  61     int ret;
  62
  63     if (avctx->channels != 1) {
  64         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  65         return AVERROR_INVALIDDATA;
  66     }
  67
  68     c->band[0].scale_factor = 8;
  69     c->band[1].scale_factor = 2;
  70     c->prev_samples_pos = 22;
  71
  72     if (avctx->trellis) {
  73         int frontier = 1 << avctx->trellis;
  74         int max_paths = frontier * FREEZE_INTERVAL;
  75         int i;
  76         for (i = 0; i < 2; i++) {
  77             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
  78             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
  79             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
  80             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
  81                 ret = AVERROR(ENOMEM);
  82                 goto error;
  83             }
  84         }
  85     }
  86
  87     if (avctx->frame_size) {
  88         /* validate frame size */
  89         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  90             int new_frame_size;
  91
  92             if (avctx->frame_size == 1)
  93                 new_frame_size = 2;
  94             else if (avctx->frame_size > MAX_FRAME_SIZE)
  95                 new_frame_size = MAX_FRAME_SIZE;
  96             else
  97                 new_frame_size = avctx->frame_size - 1;
  98
  99             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
 100                    "allowed. Using %d instead of %d\n", new_frame_size,
 101                    avctx->frame_size);
 102             avctx->frame_size = new_frame_size;
 103         }
 104     } else {
 105         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
 106            a common packet size for VoIP applications */
 107         avctx->frame_size = 320;
 108     }
 109     avctx->initial_padding = 22;
 110
 111     if (avctx->trellis) {
 112         /* validate trellis */
 113         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
 114             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
 115             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
 116                    "allowed. Using %d instead of %d\n", new_trellis,
 117                    avctx->trellis);
 118             avctx->trellis = new_trellis;
 119         }
 120     }
 121
 122     ff_g722dsp_init(&c->dsp);
 123
 124     return 0;
 125 error:
 126     g722_encode_close(avctx);
 127     return ret;
 128 }
 129
 130 static const int16_t low_quant[33] = {
 131       35,   72,  110,  150,  190,  233,  276,  323,
 132      370,  422,  473,  530,  587,  650,  714,  786,
 133      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 134     1765, 1980, 2195, 2557, 2919
 135 };
 136
 137 static inline void filter_samples(G722Context *c, const int16_t *samples,
 138                                   int *xlow, int *xhigh)
 139 {
 140     int xout[2];
 141     c->prev_samples[c->prev_samples_pos++] = samples[0];
 142     c->prev_samples[c->prev_samples_pos++] = samples[1];
 143     c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
 144     *xlow  = xout[0] + xout[1] >> 14;
 145     *xhigh = xout[0] - xout[1] >> 14;
 146     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 147         memmove(c->prev_samples,
 148                 c->prev_samples + c->prev_samples_pos - 22,
 149                 22 * sizeof(c->prev_samples[0]));
 150         c->prev_samples_pos = 22;
 151     }
 152 }
 153
 154 static inline int encode_high(const struct G722Band *state, int xhigh)
 155 {
 156     int diff = av_clip_int16(xhigh - state->s_predictor);
 157     int pred = 141 * state->scale_factor >> 8;
 158            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 159     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 160 }
 161
 162 static inline int encode_low(const struct G722Band* state, int xlow)
 163 {
 164     int diff  = av_clip_int16(xlow - state->s_predictor);
 165            /* = diff >= 0 ? diff : -(diff + 1) */
 166     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 167     int i = 0;
 168     limit = limit + 1 << 10;
 169     if (limit > low_quant[8] * state->scale_factor)
 170         i = 9;
 171     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 172         i++;
 173     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 174 }
 175
 176 static void g722_encode_trellis(G722Context *c, int trellis,
 177                                 uint8_t *dst, int nb_samples,
 178                                 const int16_t *samples)
 179 {
 180     int i, j, k;
 181     int frontier = 1 << trellis;
 182     struct TrellisNode **nodes[2];
 183     struct TrellisNode **nodes_next[2];
 184     int pathn[2] = {0, 0}, froze = -1;
 185     struct TrellisPath *p[2];
 186
 187     for (i = 0; i < 2; i++) {
 188         nodes[i] = c->nodep_buf[i];
 189         nodes_next[i] = c->nodep_buf[i] + frontier;
 190         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
 191         nodes[i][0] = c->node_buf[i] + frontier;
 192         nodes[i][0]->ssd = 0;
 193         nodes[i][0]->path = 0;
 194         nodes[i][0]->state = c->band[i];
 195     }
 196
 197     for (i = 0; i < nb_samples >> 1; i++) {
 198         int xlow, xhigh;
 199         struct TrellisNode *next[2];
 200         int heap_pos[2] = {0, 0};
 201
 202         for (j = 0; j < 2; j++) {
 203             next[j] = c->node_buf[j] + frontier*(i & 1);
 204             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 205         }
 206
 207         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 208
 209         for (j = 0; j < frontier && nodes[0][j]; j++) {
 210             /* Only k >> 2 affects the future adaptive state, therefore testing
 211              * small steps that don't change k >> 2 is useless, the original
 212              * value from encode_low is better than them. Since we step k
 213              * in steps of 4, make sure range is a multiple of 4, so that
 214              * we don't miss the original value from encode_low. */
 215             int range = j < frontier/2 ? 4 : 0;
 216             struct TrellisNode *cur_node = nodes[0][j];
 217
 218             int ilow = encode_low(&cur_node->state, xlow);
 219
 220             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 221                 int decoded, dec_diff, pos;
 222                 uint32_t ssd;
 223                 struct TrellisNode* node;
 224
 225                 if (k < 0)
 226                     continue;
 227
 228                 decoded = av_clip_intp2((cur_node->state.scale_factor *
 229                                   ff_g722_low_inv_quant6[k] >> 10)
 230                                 + cur_node->state.s_predictor, 14);
 231                 dec_diff = xlow - decoded;
 232
 233 #define STORE_NODE(index, UPDATE, VALUE)\
 234                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 235                 /* Check for wraparound. Using 64 bit ssd counters would \
 236                  * be simpler, but is slower on x86 32 bit. */\
 237                 if (ssd < cur_node->ssd)\
 238                     continue;\
 239                 if (heap_pos[index] < frontier) {\
 240                     pos = heap_pos[index]++;\
 241                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
 242                     node = nodes_next[index][pos] = next[index]++;\
 243                     node->path = pathn[index]++;\
 244                 } else {\
 245                     /* Try to replace one of the leaf nodes with the new \
 246                      * one, but not always testing the same leaf position */\
 247                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 248                     if (ssd >= nodes_next[index][pos]->ssd)\
 249                         continue;\
 250                     heap_pos[index]++;\
 251                     node = nodes_next[index][pos];\
 252                 }\
 253                 node->ssd = ssd;\
 254                 node->state = cur_node->state;\
 255                 UPDATE;\
 256                 c->paths[index][node->path].value = VALUE;\
 257                 c->paths[index][node->path].prev = cur_node->path;\
 258                 /* Sift the newly inserted node up in the heap to restore \
 259                  * the heap property */\
 260                 while (pos > 0) {\
 261                     int parent = (pos - 1) >> 1;\
 262                     if (nodes_next[index][parent]->ssd <= ssd)\
 263                         break;\
 264                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 265                                                 nodes_next[index][pos]);\
 266                     pos = parent;\
 267                 }
 268                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 269             }
 270         }
 271
 272         for (j = 0; j < frontier && nodes[1][j]; j++) {
 273             int ihigh;
 274             struct TrellisNode *cur_node = nodes[1][j];
 275
 276             /* We don't try to get any initial guess for ihigh via
 277              * encode_high - since there's only 4 possible values, test
 278              * them all. Testing all of these gives a much, much larger
 279              * gain than testing a larger range around ilow. */
 280             for (ihigh = 0; ihigh < 4; ihigh++) {
 281                 int dhigh, decoded, dec_diff, pos;
 282                 uint32_t ssd;
 283                 struct TrellisNode* node;
 284
 285                 dhigh = cur_node->state.scale_factor *
 286                         ff_g722_high_inv_quant[ihigh] >> 10;
 287                 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
 288                 dec_diff = xhigh - decoded;
 289
 290                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 291             }
 292         }
 293
 294         for (j = 0; j < 2; j++) {
 295             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 296
 297             if (nodes[j][0]->ssd > (1 << 16)) {
 298                 for (k = 1; k < frontier && nodes[j][k]; k++)
 299                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 300                 nodes[j][0]->ssd = 0;
 301             }
 302         }
 303
 304         if (i == froze + FREEZE_INTERVAL) {
 305             p[0] = &c->paths[0][nodes[0][0]->path];
 306             p[1] = &c->paths[1][nodes[1][0]->path];
 307             for (j = i; j > froze; j--) {
 308                 dst[j] = p[1]->value << 6 | p[0]->value;
 309                 p[0] = &c->paths[0][p[0]->prev];
 310                 p[1] = &c->paths[1][p[1]->prev];
 311             }
 312             froze = i;
 313             pathn[0] = pathn[1] = 0;
 314             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 315             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 316         }
 317     }
 318
 319     p[0] = &c->paths[0][nodes[0][0]->path];
 320     p[1] = &c->paths[1][nodes[1][0]->path];
 321     for (j = i; j > froze; j--) {
 322         dst[j] = p[1]->value << 6 | p[0]->value;
 323         p[0] = &c->paths[0][p[0]->prev];
 324         p[1] = &c->paths[1][p[1]->prev];
 325     }
 326     c->band[0] = nodes[0][0]->state;
 327     c->band[1] = nodes[1][0]->state;
 328 }
 329
 330 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 331                                          const int16_t *samples)
 332 {
 333     int xlow, xhigh, ilow, ihigh;
 334     filter_samples(c, samples, &xlow, &xhigh);
 335     ihigh = encode_high(&c->band[1], xhigh);
 336     ilow  = encode_low (&c->band[0], xlow);
 337     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 338                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 339     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 340     *dst = ihigh << 6 | ilow;
 341 }
 342
 343 static void g722_encode_no_trellis(G722Context *c,
 344                                    uint8_t *dst, int nb_samples,
 345                                    const int16_t *samples)
 346 {
 347     int i;
 348     for (i = 0; i < nb_samples; i += 2)
 349         encode_byte(c, dst++, &samples[i]);
 350 }
 351
 352 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 353                              const AVFrame *frame, int *got_packet_ptr)
 354 {
 355     G722Context *c = avctx->priv_data;
 356     const int16_t *samples = (const int16_t *)frame->data[0];
 357     int nb_samples, out_size, ret;
 358
 359     out_size = (frame->nb_samples + 1) / 2;
 360     if ((ret = ff_alloc_packet(avpkt, out_size))) {
 361         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
 362         return ret;
 363     }
 364
 365     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 366
 367     if (avctx->trellis)
 368         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
 369     else
 370         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
 371
 372     /* handle last frame with odd frame_size */
 373     if (nb_samples < frame->nb_samples) {
 374         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 375         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
 376     }
 377
 378     if (frame->pts != AV_NOPTS_VALUE)
 379         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 380     *got_packet_ptr = 1;
 381     return 0;
 382 }
 383
 384 AVCodec ff_adpcm_g722_encoder = {
 385     .name           = "g722",
 386     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 387     .type           = AVMEDIA_TYPE_AUDIO,
 388     .id             = AV_CODEC_ID_ADPCM_G722,
 389     .priv_data_size = sizeof(G722Context),
 390     .init           = g722_encode_init,
 391     .close          = g722_encode_close,
 392     .encode2        = g722_encode_frame,
 393     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME,
 394     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
 395                                                      AV_SAMPLE_FMT_NONE },
 396 };