2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * G.722 ADPCM audio encoder
33 #include "libavutil/common.h"
35 #define FREEZE_INTERVAL 128
37 /* This is an arbitrary value. Allowing insanely large values leads to strange
38 problems, so we limit it to a reasonable value */
39 #define MAX_FRAME_SIZE 32768
41 /* We clip the value of avctx->trellis to prevent data type overflows and
42 undefined behavior. Using larger values is insanely slow anyway. */
44 #define MAX_TRELLIS 16
46 static av_cold int g722_encode_close(AVCodecContext *avctx)
48 G722Context *c = avctx->priv_data;
50 for (i = 0; i < 2; i++) {
51 av_freep(&c->paths[i]);
52 av_freep(&c->node_buf[i]);
53 av_freep(&c->nodep_buf[i]);
58 static av_cold int g722_encode_init(AVCodecContext * avctx)
60 G722Context *c = avctx->priv_data;
63 if (avctx->channels != 1) {
64 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
65 return AVERROR_INVALIDDATA;
68 c->band[0].scale_factor = 8;
69 c->band[1].scale_factor = 2;
70 c->prev_samples_pos = 22;
73 int frontier = 1 << avctx->trellis;
74 int max_paths = frontier * FREEZE_INTERVAL;
76 for (i = 0; i < 2; i++) {
77 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
78 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
79 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
80 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
81 ret = AVERROR(ENOMEM);
87 if (avctx->frame_size) {
88 /* validate frame size */
89 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
92 if (avctx->frame_size == 1)
94 else if (avctx->frame_size > MAX_FRAME_SIZE)
95 new_frame_size = MAX_FRAME_SIZE;
97 new_frame_size = avctx->frame_size - 1;
99 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
100 "allowed. Using %d instead of %d\n", new_frame_size,
102 avctx->frame_size = new_frame_size;
105 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
106 a common packet size for VoIP applications */
107 avctx->frame_size = 320;
109 avctx->initial_padding = 22;
111 if (avctx->trellis) {
112 /* validate trellis */
113 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
114 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
115 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
116 "allowed. Using %d instead of %d\n", new_trellis,
118 avctx->trellis = new_trellis;
122 ff_g722dsp_init(&c->dsp);
126 g722_encode_close(avctx);
130 static const int16_t low_quant[33] = {
131 35, 72, 110, 150, 190, 233, 276, 323,
132 370, 422, 473, 530, 587, 650, 714, 786,
133 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
134 1765, 1980, 2195, 2557, 2919
137 static inline void filter_samples(G722Context *c, const int16_t *samples,
138 int *xlow, int *xhigh)
141 c->prev_samples[c->prev_samples_pos++] = samples[0];
142 c->prev_samples[c->prev_samples_pos++] = samples[1];
143 c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
144 *xlow = xout[0] + xout[1] >> 14;
145 *xhigh = xout[0] - xout[1] >> 14;
146 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
147 memmove(c->prev_samples,
148 c->prev_samples + c->prev_samples_pos - 22,
149 22 * sizeof(c->prev_samples[0]));
150 c->prev_samples_pos = 22;
154 static inline int encode_high(const struct G722Band *state, int xhigh)
156 int diff = av_clip_int16(xhigh - state->s_predictor);
157 int pred = 141 * state->scale_factor >> 8;
158 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
159 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
162 static inline int encode_low(const struct G722Band* state, int xlow)
164 int diff = av_clip_int16(xlow - state->s_predictor);
165 /* = diff >= 0 ? diff : -(diff + 1) */
166 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
168 limit = limit + 1 << 10;
169 if (limit > low_quant[8] * state->scale_factor)
171 while (i < 29 && limit > low_quant[i] * state->scale_factor)
173 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
176 static void g722_encode_trellis(G722Context *c, int trellis,
177 uint8_t *dst, int nb_samples,
178 const int16_t *samples)
181 int frontier = 1 << trellis;
182 struct TrellisNode **nodes[2];
183 struct TrellisNode **nodes_next[2];
184 int pathn[2] = {0, 0}, froze = -1;
185 struct TrellisPath *p[2];
187 for (i = 0; i < 2; i++) {
188 nodes[i] = c->nodep_buf[i];
189 nodes_next[i] = c->nodep_buf[i] + frontier;
190 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
191 nodes[i][0] = c->node_buf[i] + frontier;
192 nodes[i][0]->ssd = 0;
193 nodes[i][0]->path = 0;
194 nodes[i][0]->state = c->band[i];
197 for (i = 0; i < nb_samples >> 1; i++) {
199 struct TrellisNode *next[2];
200 int heap_pos[2] = {0, 0};
202 for (j = 0; j < 2; j++) {
203 next[j] = c->node_buf[j] + frontier*(i & 1);
204 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
207 filter_samples(c, &samples[2*i], &xlow, &xhigh);
209 for (j = 0; j < frontier && nodes[0][j]; j++) {
210 /* Only k >> 2 affects the future adaptive state, therefore testing
211 * small steps that don't change k >> 2 is useless, the original
212 * value from encode_low is better than them. Since we step k
213 * in steps of 4, make sure range is a multiple of 4, so that
214 * we don't miss the original value from encode_low. */
215 int range = j < frontier/2 ? 4 : 0;
216 struct TrellisNode *cur_node = nodes[0][j];
218 int ilow = encode_low(&cur_node->state, xlow);
220 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
221 int decoded, dec_diff, pos;
223 struct TrellisNode* node;
228 decoded = av_clip_intp2((cur_node->state.scale_factor *
229 ff_g722_low_inv_quant6[k] >> 10)
230 + cur_node->state.s_predictor, 14);
231 dec_diff = xlow - decoded;
233 #define STORE_NODE(index, UPDATE, VALUE)\
234 ssd = cur_node->ssd + dec_diff*dec_diff;\
235 /* Check for wraparound. Using 64 bit ssd counters would \
236 * be simpler, but is slower on x86 32 bit. */\
237 if (ssd < cur_node->ssd)\
239 if (heap_pos[index] < frontier) {\
240 pos = heap_pos[index]++;\
241 assert(pathn[index] < FREEZE_INTERVAL * frontier);\
242 node = nodes_next[index][pos] = next[index]++;\
243 node->path = pathn[index]++;\
245 /* Try to replace one of the leaf nodes with the new \
246 * one, but not always testing the same leaf position */\
247 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
248 if (ssd >= nodes_next[index][pos]->ssd)\
251 node = nodes_next[index][pos];\
254 node->state = cur_node->state;\
256 c->paths[index][node->path].value = VALUE;\
257 c->paths[index][node->path].prev = cur_node->path;\
258 /* Sift the newly inserted node up in the heap to restore \
259 * the heap property */\
261 int parent = (pos - 1) >> 1;\
262 if (nodes_next[index][parent]->ssd <= ssd)\
264 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
265 nodes_next[index][pos]);\
268 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
272 for (j = 0; j < frontier && nodes[1][j]; j++) {
274 struct TrellisNode *cur_node = nodes[1][j];
276 /* We don't try to get any initial guess for ihigh via
277 * encode_high - since there's only 4 possible values, test
278 * them all. Testing all of these gives a much, much larger
279 * gain than testing a larger range around ilow. */
280 for (ihigh = 0; ihigh < 4; ihigh++) {
281 int dhigh, decoded, dec_diff, pos;
283 struct TrellisNode* node;
285 dhigh = cur_node->state.scale_factor *
286 ff_g722_high_inv_quant[ihigh] >> 10;
287 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
288 dec_diff = xhigh - decoded;
290 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
294 for (j = 0; j < 2; j++) {
295 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
297 if (nodes[j][0]->ssd > (1 << 16)) {
298 for (k = 1; k < frontier && nodes[j][k]; k++)
299 nodes[j][k]->ssd -= nodes[j][0]->ssd;
300 nodes[j][0]->ssd = 0;
304 if (i == froze + FREEZE_INTERVAL) {
305 p[0] = &c->paths[0][nodes[0][0]->path];
306 p[1] = &c->paths[1][nodes[1][0]->path];
307 for (j = i; j > froze; j--) {
308 dst[j] = p[1]->value << 6 | p[0]->value;
309 p[0] = &c->paths[0][p[0]->prev];
310 p[1] = &c->paths[1][p[1]->prev];
313 pathn[0] = pathn[1] = 0;
314 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
315 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
319 p[0] = &c->paths[0][nodes[0][0]->path];
320 p[1] = &c->paths[1][nodes[1][0]->path];
321 for (j = i; j > froze; j--) {
322 dst[j] = p[1]->value << 6 | p[0]->value;
323 p[0] = &c->paths[0][p[0]->prev];
324 p[1] = &c->paths[1][p[1]->prev];
326 c->band[0] = nodes[0][0]->state;
327 c->band[1] = nodes[1][0]->state;
330 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
331 const int16_t *samples)
333 int xlow, xhigh, ilow, ihigh;
334 filter_samples(c, samples, &xlow, &xhigh);
335 ihigh = encode_high(&c->band[1], xhigh);
336 ilow = encode_low (&c->band[0], xlow);
337 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
338 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
339 ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
340 *dst = ihigh << 6 | ilow;
343 static void g722_encode_no_trellis(G722Context *c,
344 uint8_t *dst, int nb_samples,
345 const int16_t *samples)
348 for (i = 0; i < nb_samples; i += 2)
349 encode_byte(c, dst++, &samples[i]);
352 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
353 const AVFrame *frame, int *got_packet_ptr)
355 G722Context *c = avctx->priv_data;
356 const int16_t *samples = (const int16_t *)frame->data[0];
357 int nb_samples, out_size, ret;
359 out_size = (frame->nb_samples + 1) / 2;
360 if ((ret = ff_alloc_packet(avpkt, out_size))) {
361 av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
365 nb_samples = frame->nb_samples - (frame->nb_samples & 1);
368 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
370 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
372 /* handle last frame with odd frame_size */
373 if (nb_samples < frame->nb_samples) {
374 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
375 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
378 if (frame->pts != AV_NOPTS_VALUE)
379 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
384 AVCodec ff_adpcm_g722_encoder = {
386 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
387 .type = AVMEDIA_TYPE_AUDIO,
388 .id = AV_CODEC_ID_ADPCM_G722,
389 .priv_data_size = sizeof(G722Context),
390 .init = g722_encode_init,
391 .close = g722_encode_close,
392 .encode2 = g722_encode_frame,
393 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
394 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
395 AV_SAMPLE_FMT_NONE },