4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "libavutil/pixdesc.h"
28 #include "bytestream.h"
30 #include "proresdata.h"
32 #define CFACTOR_Y422 2
33 #define CFACTOR_Y444 3
35 #define MAX_MBS_PER_SLICE 8
40 PRORES_PROFILE_PROXY = 0,
42 PRORES_PROFILE_STANDARD,
55 static const uint8_t prores_quant_matrices[][64] = {
57 4, 7, 9, 11, 13, 14, 15, 63,
58 7, 7, 11, 12, 14, 15, 63, 63,
59 9, 11, 13, 14, 15, 63, 63, 63,
60 11, 11, 13, 14, 63, 63, 63, 63,
61 11, 13, 14, 63, 63, 63, 63, 63,
62 13, 14, 63, 63, 63, 63, 63, 63,
63 13, 63, 63, 63, 63, 63, 63, 63,
64 63, 63, 63, 63, 63, 63, 63, 63,
67 4, 5, 6, 7, 9, 11, 13, 15,
68 5, 5, 7, 8, 11, 13, 15, 17,
69 6, 7, 9, 11, 13, 15, 15, 17,
70 7, 7, 9, 11, 13, 15, 17, 19,
71 7, 9, 11, 13, 14, 16, 19, 23,
72 9, 11, 13, 14, 16, 19, 23, 29,
73 9, 11, 13, 15, 17, 21, 28, 35,
74 11, 13, 16, 17, 21, 28, 35, 41,
77 4, 4, 5, 5, 6, 7, 7, 9,
78 4, 4, 5, 6, 7, 7, 9, 9,
79 5, 5, 6, 7, 7, 9, 9, 10,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 6, 7, 7, 8, 9, 10, 12,
82 6, 7, 7, 8, 9, 10, 12, 15,
83 6, 7, 7, 9, 10, 11, 14, 17,
84 7, 7, 9, 10, 11, 14, 17, 21,
87 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 5,
91 4, 4, 4, 4, 4, 4, 5, 5,
92 4, 4, 4, 4, 4, 5, 5, 6,
93 4, 4, 4, 4, 5, 5, 6, 7,
94 4, 4, 4, 4, 5, 6, 7, 7,
97 4, 4, 4, 4, 4, 4, 4, 4,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
108 #define NUM_MB_LIMITS 4
109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
110 1620, // up to 720x576
111 2700, // up to 960x720
112 6075, // up to 1440x1080
113 9216, // up to 2048x1152
116 static const struct prores_profile {
117 const char *full_name;
121 int br_tab[NUM_MB_LIMITS];
123 } prores_profile_info[5] = {
125 .full_name = "proxy",
126 .tag = MKTAG('a', 'p', 'c', 'o'),
129 .br_tab = { 300, 242, 220, 194 },
130 .quant = QUANT_MAT_PROXY,
134 .tag = MKTAG('a', 'p', 'c', 's'),
137 .br_tab = { 720, 560, 490, 440 },
138 .quant = QUANT_MAT_LT,
141 .full_name = "standard",
142 .tag = MKTAG('a', 'p', 'c', 'n'),
145 .br_tab = { 1050, 808, 710, 632 },
146 .quant = QUANT_MAT_STANDARD,
149 .full_name = "high quality",
150 .tag = MKTAG('a', 'p', 'c', 'h'),
153 .br_tab = { 1566, 1216, 1070, 950 },
154 .quant = QUANT_MAT_HQ,
158 .tag = MKTAG('a', 'p', '4', 'h'),
161 .br_tab = { 2350, 1828, 1600, 1425 },
162 .quant = QUANT_MAT_HQ,
166 #define TRELLIS_WIDTH 16
167 #define SCORE_LIMIT INT_MAX / 2
176 #define MAX_STORED_Q 16
178 typedef struct ProresThreadData {
179 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
180 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
181 int16_t custom_q[64];
182 struct TrellisNode *nodes;
185 typedef struct ProresContext {
187 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
188 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
189 int16_t quants[MAX_STORED_Q][64];
190 int16_t custom_q[64];
191 const uint8_t *quant_mat;
192 const uint8_t *scantable;
194 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
195 int linesize, int16_t *block);
198 int mb_width, mb_height;
200 int num_chroma_blocks, chroma_factor;
202 int slices_per_picture;
203 int pictures_per_frame; // 1 for progressive, 2 for interlaced
213 int frame_size_upper_bound;
216 const struct prores_profile *profile_info;
220 ProresThreadData *tdata;
223 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
224 int linesize, int x, int y, int w, int h,
225 int16_t *blocks, uint16_t *emu_buf,
226 int mbs_per_slice, int blocks_per_mb, int is_chroma)
228 const uint16_t *esrc;
229 const int mb_width = 4 * blocks_per_mb;
233 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
235 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
239 if (x + mb_width <= w && y + 16 <= h) {
241 elinesize = linesize;
246 elinesize = 16 * sizeof(*emu_buf);
248 bw = FFMIN(w - x, mb_width);
249 bh = FFMIN(h - y, 16);
251 for (j = 0; j < bh; j++) {
252 memcpy(emu_buf + j * 16,
253 (const uint8_t*)src + j * linesize,
255 pix = emu_buf[j * 16 + bw - 1];
256 for (k = bw; k < mb_width; k++)
257 emu_buf[j * 16 + k] = pix;
260 memcpy(emu_buf + j * 16,
261 emu_buf + (bh - 1) * 16,
262 mb_width * sizeof(*emu_buf));
265 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
267 if (blocks_per_mb > 2) {
268 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
271 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
273 if (blocks_per_mb > 2) {
274 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
278 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
280 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
282 if (blocks_per_mb > 2) {
283 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
285 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
294 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
295 int linesize, int x, int y, int w, int h,
296 int16_t *blocks, int mbs_per_slice, int abits)
298 const int slice_width = 16 * mbs_per_slice;
299 int i, j, copy_w, copy_h;
301 copy_w = FFMIN(w - x, slice_width);
302 copy_h = FFMIN(h - y, 16);
303 for (i = 0; i < copy_h; i++) {
304 memcpy(blocks, src, copy_w * sizeof(*src));
306 for (j = 0; j < copy_w; j++)
309 for (j = 0; j < copy_w; j++)
310 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
311 for (j = copy_w; j < slice_width; j++)
312 blocks[j] = blocks[copy_w - 1];
313 blocks += slice_width;
314 src += linesize >> 1;
316 for (; i < 16; i++) {
317 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
318 blocks += slice_width;
323 * Write an unsigned rice/exp golomb codeword.
325 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
327 unsigned int rice_order, exp_order, switch_bits, switch_val;
330 /* number of prefix bits to switch between Rice and expGolomb */
331 switch_bits = (codebook & 3) + 1;
332 rice_order = codebook >> 5; /* rice code order */
333 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
335 switch_val = switch_bits << rice_order;
337 if (val >= switch_val) {
338 val -= switch_val - (1 << exp_order);
339 exponent = av_log2(val);
341 put_bits(pb, exponent - exp_order + switch_bits, 0);
342 put_bits(pb, exponent + 1, val);
344 exponent = val >> rice_order;
347 put_bits(pb, exponent, 0);
350 put_sbits(pb, rice_order, val);
354 #define GET_SIGN(x) ((x) >> 31)
355 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
357 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
358 int blocks_per_slice, int scale)
361 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
363 prev_dc = (blocks[0] - 0x4000) / scale;
364 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
369 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
370 dc = (blocks[0] - 0x4000) / scale;
371 delta = dc - prev_dc;
372 new_sign = GET_SIGN(delta);
373 delta = (delta ^ sign) - sign;
374 code = MAKE_CODE(delta);
375 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
376 codebook = (code + (code & 1)) >> 1;
377 codebook = FFMIN(codebook, 3);
383 static void encode_acs(PutBitContext *pb, int16_t *blocks,
384 int blocks_per_slice,
385 int plane_size_factor,
386 const uint8_t *scan, const int16_t *qmat)
389 int run, level, run_cb, lev_cb;
390 int max_coeffs, abs_level;
392 max_coeffs = blocks_per_slice << 6;
393 run_cb = ff_prores_run_to_cb_index[4];
394 lev_cb = ff_prores_lev_to_cb_index[2];
397 for (i = 1; i < 64; i++) {
398 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
399 level = blocks[idx] / qmat[scan[i]];
401 abs_level = FFABS(level);
402 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
403 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
405 put_sbits(pb, 1, GET_SIGN(level));
407 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
408 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
417 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
418 const uint16_t *src, int linesize,
419 int mbs_per_slice, int16_t *blocks,
420 int blocks_per_mb, int plane_size_factor,
423 int blocks_per_slice, saved_pos;
425 saved_pos = put_bits_count(pb);
426 blocks_per_slice = mbs_per_slice * blocks_per_mb;
428 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
429 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
430 ctx->scantable, qmat);
433 return (put_bits_count(pb) - saved_pos) >> 3;
436 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
438 const int mask = (1 << abits) - 1;
439 const int dbits = (abits == 8) ? 4 : 7;
440 const int dsize = 1 << dbits - 1;
441 int diff = cur - prev;
444 if (diff >= (1 << abits) - dsize)
446 if (diff < -dsize || diff > dsize || !diff) {
448 put_bits(pb, abits, diff);
451 put_bits(pb, dbits - 1, FFABS(diff) - 1);
452 put_bits(pb, 1, diff < 0);
456 static void put_alpha_run(PutBitContext *pb, int run)
461 put_bits(pb, 4, run);
463 put_bits(pb, 15, run);
469 // todo alpha quantisation for high quants
470 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
471 int mbs_per_slice, uint16_t *blocks,
474 const int abits = ctx->alpha_bits;
475 const int mask = (1 << abits) - 1;
476 const int num_coeffs = mbs_per_slice * 256;
477 int saved_pos = put_bits_count(pb);
478 int prev = mask, cur;
483 put_alpha_diff(pb, cur, prev, abits);
488 put_alpha_run (pb, run);
489 put_alpha_diff(pb, cur, prev, abits);
495 } while (idx < num_coeffs);
497 put_alpha_run(pb, run);
499 return (put_bits_count(pb) - saved_pos) >> 3;
502 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
504 int sizes[4], int x, int y, int quant,
507 ProresContext *ctx = avctx->priv_data;
511 int slice_width_factor = av_log2(mbs_per_slice);
512 int num_cblocks, pwidth, linesize, line_add;
513 int plane_factor, is_chroma;
516 if (ctx->pictures_per_frame == 1)
519 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
521 if (ctx->force_quant) {
522 qmat = ctx->quants[0];
523 } else if (quant < MAX_STORED_Q) {
524 qmat = ctx->quants[quant];
526 qmat = ctx->custom_q;
527 for (i = 0; i < 64; i++)
528 qmat[i] = ctx->quant_mat[i] * quant;
531 for (i = 0; i < ctx->num_planes; i++) {
532 is_chroma = (i == 1 || i == 2);
533 plane_factor = slice_width_factor + 2;
535 plane_factor += ctx->chroma_factor - 3;
536 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
540 pwidth = avctx->width;
545 pwidth = avctx->width >> 1;
548 linesize = pic->linesize[i] * ctx->pictures_per_frame;
549 src = (const uint16_t*)(pic->data[i] + yp * linesize +
550 line_add * pic->linesize[i]) + xp;
553 get_slice_data(ctx, src, linesize, xp, yp,
554 pwidth, avctx->height / ctx->pictures_per_frame,
555 ctx->blocks[0], ctx->emu_buf,
556 mbs_per_slice, num_cblocks, is_chroma);
557 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
558 mbs_per_slice, ctx->blocks[0],
559 num_cblocks, plane_factor,
562 get_alpha_data(ctx, src, linesize, xp, yp,
563 pwidth, avctx->height / ctx->pictures_per_frame,
564 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
565 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
566 ctx->blocks[0], quant);
568 total_size += sizes[i];
569 if (put_bits_left(pb) < 0) {
570 av_log(avctx, AV_LOG_ERROR,
571 "Underestimated required buffer size.\n");
578 static inline int estimate_vlc(unsigned codebook, int val)
580 unsigned int rice_order, exp_order, switch_bits, switch_val;
583 /* number of prefix bits to switch between Rice and expGolomb */
584 switch_bits = (codebook & 3) + 1;
585 rice_order = codebook >> 5; /* rice code order */
586 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
588 switch_val = switch_bits << rice_order;
590 if (val >= switch_val) {
591 val -= switch_val - (1 << exp_order);
592 exponent = av_log2(val);
594 return exponent * 2 - exp_order + switch_bits + 1;
596 return (val >> rice_order) + rice_order + 1;
600 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
604 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
607 prev_dc = (blocks[0] - 0x4000) / scale;
608 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
612 *error += FFABS(blocks[0] - 0x4000) % scale;
614 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
615 dc = (blocks[0] - 0x4000) / scale;
616 *error += FFABS(blocks[0] - 0x4000) % scale;
617 delta = dc - prev_dc;
618 new_sign = GET_SIGN(delta);
619 delta = (delta ^ sign) - sign;
620 code = MAKE_CODE(delta);
621 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
622 codebook = (code + (code & 1)) >> 1;
623 codebook = FFMIN(codebook, 3);
631 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
632 int plane_size_factor,
633 const uint8_t *scan, const int16_t *qmat)
636 int run, level, run_cb, lev_cb;
637 int max_coeffs, abs_level;
640 max_coeffs = blocks_per_slice << 6;
641 run_cb = ff_prores_run_to_cb_index[4];
642 lev_cb = ff_prores_lev_to_cb_index[2];
645 for (i = 1; i < 64; i++) {
646 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
647 level = blocks[idx] / qmat[scan[i]];
648 *error += FFABS(blocks[idx]) % qmat[scan[i]];
650 abs_level = FFABS(level);
651 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
652 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
655 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
656 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
667 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
668 const uint16_t *src, int linesize,
670 int blocks_per_mb, int plane_size_factor,
671 const int16_t *qmat, ProresThreadData *td)
673 int blocks_per_slice;
676 blocks_per_slice = mbs_per_slice * blocks_per_mb;
678 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
679 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
680 plane_size_factor, ctx->scantable, qmat);
682 return FFALIGN(bits, 8);
685 static int est_alpha_diff(int cur, int prev, int abits)
687 const int mask = (1 << abits) - 1;
688 const int dbits = (abits == 8) ? 4 : 7;
689 const int dsize = 1 << dbits - 1;
690 int diff = cur - prev;
693 if (diff >= (1 << abits) - dsize)
695 if (diff < -dsize || diff > dsize || !diff)
701 static int estimate_alpha_plane(ProresContext *ctx, int *error,
702 const uint16_t *src, int linesize,
703 int mbs_per_slice, int quant,
706 const int abits = ctx->alpha_bits;
707 const int mask = (1 << abits) - 1;
708 const int num_coeffs = mbs_per_slice * 256;
709 int prev = mask, cur;
716 bits = est_alpha_diff(cur, prev, abits);
727 bits += est_alpha_diff(cur, prev, abits);
733 } while (idx < num_coeffs);
745 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
746 int trellis_node, int x, int y, int mbs_per_slice,
747 ProresThreadData *td)
749 ProresContext *ctx = avctx->priv_data;
750 int i, q, pq, xp, yp;
752 int slice_width_factor = av_log2(mbs_per_slice);
753 int num_cblocks[MAX_PLANES], pwidth;
754 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
755 const int min_quant = ctx->profile_info->min_quant;
756 const int max_quant = ctx->profile_info->max_quant;
757 int error, bits, bits_limit;
758 int mbs, prev, cur, new_score;
759 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
762 int linesize[4], line_add;
764 if (ctx->pictures_per_frame == 1)
767 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
768 mbs = x + mbs_per_slice;
770 for (i = 0; i < ctx->num_planes; i++) {
771 is_chroma[i] = (i == 1 || i == 2);
772 plane_factor[i] = slice_width_factor + 2;
774 plane_factor[i] += ctx->chroma_factor - 3;
775 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
779 pwidth = avctx->width;
784 pwidth = avctx->width >> 1;
787 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
788 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
789 line_add * pic->linesize[i]) + xp;
792 get_slice_data(ctx, src, linesize[i], xp, yp,
793 pwidth, avctx->height / ctx->pictures_per_frame,
794 td->blocks[i], td->emu_buf,
795 mbs_per_slice, num_cblocks[i], is_chroma[i]);
797 get_alpha_data(ctx, src, linesize[i], xp, yp,
798 pwidth, avctx->height / ctx->pictures_per_frame,
799 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
803 for (q = min_quant; q < max_quant + 2; q++) {
804 td->nodes[trellis_node + q].prev_node = -1;
805 td->nodes[trellis_node + q].quant = q;
808 // todo: maybe perform coarser quantising to fit into frame size when needed
809 for (q = min_quant; q <= max_quant; q++) {
812 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
813 bits += estimate_slice_plane(ctx, &error, i,
816 num_cblocks[i], plane_factor[i],
820 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
821 mbs_per_slice, q, td->blocks[3]);
822 if (bits > 65000 * 8) {
826 slice_bits[q] = bits;
827 slice_score[q] = error;
829 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
830 slice_bits[max_quant + 1] = slice_bits[max_quant];
831 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
832 overquant = max_quant;
834 for (q = max_quant + 1; q < 128; q++) {
837 if (q < MAX_STORED_Q) {
838 qmat = ctx->quants[q];
841 for (i = 0; i < 64; i++)
842 qmat[i] = ctx->quant_mat[i] * q;
844 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
845 bits += estimate_slice_plane(ctx, &error, i,
848 num_cblocks[i], plane_factor[i],
852 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
853 mbs_per_slice, q, td->blocks[3]);
854 if (bits <= ctx->bits_per_mb * mbs_per_slice)
858 slice_bits[max_quant + 1] = bits;
859 slice_score[max_quant + 1] = error;
862 td->nodes[trellis_node + max_quant + 1].quant = overquant;
864 bits_limit = mbs * ctx->bits_per_mb;
865 for (pq = min_quant; pq < max_quant + 2; pq++) {
866 prev = trellis_node - TRELLIS_WIDTH + pq;
868 for (q = min_quant; q < max_quant + 2; q++) {
869 cur = trellis_node + q;
871 bits = td->nodes[prev].bits + slice_bits[q];
872 error = slice_score[q];
873 if (bits > bits_limit)
876 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
877 new_score = td->nodes[prev].score + error;
879 new_score = SCORE_LIMIT;
880 if (td->nodes[cur].prev_node == -1 ||
881 td->nodes[cur].score >= new_score) {
883 td->nodes[cur].bits = bits;
884 td->nodes[cur].score = new_score;
885 td->nodes[cur].prev_node = prev;
890 error = td->nodes[trellis_node + min_quant].score;
891 pq = trellis_node + min_quant;
892 for (q = min_quant + 1; q < max_quant + 2; q++) {
893 if (td->nodes[trellis_node + q].score <= error) {
894 error = td->nodes[trellis_node + q].score;
895 pq = trellis_node + q;
902 static int find_quant_thread(AVCodecContext *avctx, void *arg,
903 int jobnr, int threadnr)
905 ProresContext *ctx = avctx->priv_data;
906 ProresThreadData *td = ctx->tdata + threadnr;
907 int mbs_per_slice = ctx->mbs_per_slice;
908 int x, y = jobnr, mb, q = 0;
910 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
911 while (ctx->mb_width - x < mbs_per_slice)
913 q = find_slice_quant(avctx, avctx->coded_frame,
914 (mb + 1) * TRELLIS_WIDTH, x, y,
918 for (x = ctx->slices_width - 1; x >= 0; x--) {
919 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
920 q = td->nodes[q].prev_node;
926 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
927 const AVFrame *pic, int *got_packet)
929 ProresContext *ctx = avctx->priv_data;
930 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
931 uint8_t *picture_size_pos;
933 int x, y, i, mb, q = 0;
934 int sizes[4] = { 0 };
935 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
936 int frame_size, picture_size, slice_size;
940 *avctx->coded_frame = *pic;
941 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
942 avctx->coded_frame->key_frame = 1;
944 pkt_size = ctx->frame_size_upper_bound;
946 if ((ret = ff_alloc_packet(pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0) {
947 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
951 orig_buf = pkt->data;
954 orig_buf += 4; // frame size
955 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
960 buf += 2; // frame header size will be stored here
961 bytestream_put_be16 (&buf, 0); // version 1
962 bytestream_put_buffer(&buf, ctx->vendor, 4);
963 bytestream_put_be16 (&buf, avctx->width);
964 bytestream_put_be16 (&buf, avctx->height);
966 frame_flags = ctx->chroma_factor << 6;
967 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
968 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
969 bytestream_put_byte (&buf, frame_flags);
971 bytestream_put_byte (&buf, 0); // reserved
972 bytestream_put_byte (&buf, avctx->color_primaries);
973 bytestream_put_byte (&buf, avctx->color_trc);
974 bytestream_put_byte (&buf, avctx->colorspace);
975 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
976 bytestream_put_byte (&buf, 0); // reserved
977 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
978 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
979 // luma quantisation matrix
980 for (i = 0; i < 64; i++)
981 bytestream_put_byte(&buf, ctx->quant_mat[i]);
982 // chroma quantisation matrix
983 for (i = 0; i < 64; i++)
984 bytestream_put_byte(&buf, ctx->quant_mat[i]);
986 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
988 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
990 for (ctx->cur_picture_idx = 0;
991 ctx->cur_picture_idx < ctx->pictures_per_frame;
992 ctx->cur_picture_idx++) {
994 picture_size_pos = buf + 1;
995 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
996 buf += 4; // picture data size will be stored here
997 bytestream_put_be16 (&buf, ctx->slices_per_picture);
998 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1000 // seek table - will be filled during slice encoding
1002 buf += ctx->slices_per_picture * 2;
1005 if (!ctx->force_quant) {
1006 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1012 for (y = 0; y < ctx->mb_height; y++) {
1013 int mbs_per_slice = ctx->mbs_per_slice;
1014 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1015 q = ctx->force_quant ? ctx->force_quant
1016 : ctx->slice_q[mb + y * ctx->slices_width];
1018 while (ctx->mb_width - x < mbs_per_slice)
1019 mbs_per_slice >>= 1;
1021 bytestream_put_byte(&buf, slice_hdr_size << 3);
1023 buf += slice_hdr_size - 1;
1024 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1025 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1030 bytestream_put_byte(&slice_hdr, q);
1031 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1032 for (i = 0; i < ctx->num_planes - 1; i++) {
1033 bytestream_put_be16(&slice_hdr, sizes[i]);
1034 slice_size += sizes[i];
1036 bytestream_put_be16(&slice_sizes, slice_size);
1037 buf += slice_size - slice_hdr_size;
1041 if (ctx->pictures_per_frame == 1)
1042 picture_size = buf - picture_size_pos - 6;
1044 picture_size = buf - picture_size_pos + 1;
1045 bytestream_put_be32(&picture_size_pos, picture_size);
1049 frame_size = buf - orig_buf;
1050 bytestream_put_be32(&orig_buf, frame_size);
1052 pkt->size = frame_size;
1053 pkt->flags |= AV_PKT_FLAG_KEY;
1059 static av_cold int encode_close(AVCodecContext *avctx)
1061 ProresContext *ctx = avctx->priv_data;
1064 av_freep(&avctx->coded_frame);
1067 for (i = 0; i < avctx->thread_count; i++)
1068 av_free(ctx->tdata[i].nodes);
1070 av_freep(&ctx->tdata);
1071 av_freep(&ctx->slice_q);
1076 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1077 int linesize, int16_t *block)
1080 const uint16_t *tsrc = src;
1082 for (y = 0; y < 8; y++) {
1083 for (x = 0; x < 8; x++)
1084 block[y * 8 + x] = tsrc[x];
1085 tsrc += linesize >> 1;
1090 static av_cold int encode_init(AVCodecContext *avctx)
1092 ProresContext *ctx = avctx->priv_data;
1095 int min_quant, max_quant;
1096 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1098 avctx->bits_per_raw_sample = 10;
1099 avctx->coded_frame = av_frame_alloc();
1100 if (!avctx->coded_frame)
1101 return AVERROR(ENOMEM);
1103 ctx->fdct = prores_fdct;
1104 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1105 : ff_prores_progressive_scan;
1106 ff_fdctdsp_init(&ctx->fdsp, avctx);
1108 mps = ctx->mbs_per_slice;
1109 if (mps & (mps - 1)) {
1110 av_log(avctx, AV_LOG_ERROR,
1111 "there should be an integer power of two MBs per slice\n");
1112 return AVERROR(EINVAL);
1114 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1115 if (ctx->alpha_bits & 7) {
1116 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1117 return AVERROR(EINVAL);
1120 ctx->alpha_bits = 0;
1123 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1126 ctx->profile_info = prores_profile_info + ctx->profile;
1127 ctx->num_planes = 3 + !!ctx->alpha_bits;
1129 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1132 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1134 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1136 ctx->slices_width = ctx->mb_width / mps;
1137 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1138 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1139 ctx->pictures_per_frame = 1 + interlaced;
1141 if (ctx->quant_sel == -1)
1142 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1144 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1146 if (strlen(ctx->vendor) != 4) {
1147 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1148 return AVERROR_INVALIDDATA;
1151 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1152 if (!ctx->force_quant) {
1153 if (!ctx->bits_per_mb) {
1154 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1155 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1156 ctx->pictures_per_frame)
1158 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1159 } else if (ctx->bits_per_mb < 128) {
1160 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1161 return AVERROR_INVALIDDATA;
1164 min_quant = ctx->profile_info->min_quant;
1165 max_quant = ctx->profile_info->max_quant;
1166 for (i = min_quant; i < MAX_STORED_Q; i++) {
1167 for (j = 0; j < 64; j++)
1168 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1171 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1172 if (!ctx->slice_q) {
1173 encode_close(avctx);
1174 return AVERROR(ENOMEM);
1177 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1179 encode_close(avctx);
1180 return AVERROR(ENOMEM);
1183 for (j = 0; j < avctx->thread_count; j++) {
1184 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1186 * sizeof(*ctx->tdata->nodes));
1187 if (!ctx->tdata[j].nodes) {
1188 encode_close(avctx);
1189 return AVERROR(ENOMEM);
1191 for (i = min_quant; i < max_quant + 2; i++) {
1192 ctx->tdata[j].nodes[i].prev_node = -1;
1193 ctx->tdata[j].nodes[i].bits = 0;
1194 ctx->tdata[j].nodes[i].score = 0;
1200 if (ctx->force_quant > 64) {
1201 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1202 return AVERROR_INVALIDDATA;
1205 for (j = 0; j < 64; j++) {
1206 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1207 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1210 ctx->bits_per_mb = ls * 8;
1211 if (ctx->chroma_factor == CFACTOR_Y444)
1212 ctx->bits_per_mb += ls * 4;
1213 if (ctx->num_planes == 4)
1214 ctx->bits_per_mb += ls * 4;
1217 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1218 ctx->slices_per_picture *
1219 (2 + 2 * ctx->num_planes +
1220 (mps * ctx->bits_per_mb) / 8)
1223 avctx->codec_tag = ctx->profile_info->tag;
1225 av_log(avctx, AV_LOG_DEBUG,
1226 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1227 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1228 interlaced ? "yes" : "no", ctx->bits_per_mb);
1229 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1230 ctx->frame_size_upper_bound);
1235 #define OFFSET(x) offsetof(ProresContext, x)
1236 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1238 static const AVOption options[] = {
1239 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1240 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1241 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1242 { .i64 = PRORES_PROFILE_STANDARD },
1243 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1244 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1245 0, 0, VE, "profile" },
1246 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1247 0, 0, VE, "profile" },
1248 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1249 0, 0, VE, "profile" },
1250 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1251 0, 0, VE, "profile" },
1252 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1253 0, 0, VE, "profile" },
1254 { "vendor", "vendor ID", OFFSET(vendor),
1255 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1256 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1257 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1258 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1259 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1260 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1261 0, 0, VE, "quant_mat" },
1262 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1263 0, 0, VE, "quant_mat" },
1264 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1265 0, 0, VE, "quant_mat" },
1266 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1267 0, 0, VE, "quant_mat" },
1268 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1269 0, 0, VE, "quant_mat" },
1270 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1271 0, 0, VE, "quant_mat" },
1272 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1273 { .i64 = 16 }, 0, 16, VE },
1277 static const AVClass proresenc_class = {
1278 .class_name = "ProRes encoder",
1279 .item_name = av_default_item_name,
1281 .version = LIBAVUTIL_VERSION_INT,
1284 AVCodec ff_prores_encoder = {
1286 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1287 .type = AVMEDIA_TYPE_VIDEO,
1288 .id = AV_CODEC_ID_PRORES,
1289 .priv_data_size = sizeof(ProresContext),
1290 .init = encode_init,
1291 .close = encode_close,
1292 .encode2 = encode_frame,
1293 .capabilities = CODEC_CAP_SLICE_THREADS,
1294 .pix_fmts = (const enum AVPixelFormat[]) {
1295 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1296 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1298 .priv_class = &proresenc_class,