4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
30 #include "codec_internal.h"
35 #include "bytestream.h"
36 #include "proresdata.h"
38 #define CFACTOR_Y422 2
39 #define CFACTOR_Y444 3
41 #define MAX_MBS_PER_SLICE 8
46 PRORES_PROFILE_AUTO = -1,
47 PRORES_PROFILE_PROXY = 0,
49 PRORES_PROFILE_STANDARD,
52 PRORES_PROFILE_4444XQ,
57 QUANT_MAT_PROXY_CHROMA,
65 static const uint8_t prores_quant_matrices[][64] = {
67 4, 7, 9, 11, 13, 14, 15, 63,
68 7, 7, 11, 12, 14, 15, 63, 63,
69 9, 11, 13, 14, 15, 63, 63, 63,
70 11, 11, 13, 14, 63, 63, 63, 63,
71 11, 13, 14, 63, 63, 63, 63, 63,
72 13, 14, 63, 63, 63, 63, 63, 63,
73 13, 63, 63, 63, 63, 63, 63, 63,
74 63, 63, 63, 63, 63, 63, 63, 63,
77 4, 7, 9, 11, 13, 14, 63, 63,
78 7, 7, 11, 12, 14, 63, 63, 63,
79 9, 11, 13, 14, 63, 63, 63, 63,
80 11, 11, 13, 14, 63, 63, 63, 63,
81 11, 13, 14, 63, 63, 63, 63, 63,
82 13, 14, 63, 63, 63, 63, 63, 63,
83 13, 63, 63, 63, 63, 63, 63, 63,
84 63, 63, 63, 63, 63, 63, 63, 63
87 4, 5, 6, 7, 9, 11, 13, 15,
88 5, 5, 7, 8, 11, 13, 15, 17,
89 6, 7, 9, 11, 13, 15, 15, 17,
90 7, 7, 9, 11, 13, 15, 17, 19,
91 7, 9, 11, 13, 14, 16, 19, 23,
92 9, 11, 13, 14, 16, 19, 23, 29,
93 9, 11, 13, 15, 17, 21, 28, 35,
94 11, 13, 16, 17, 21, 28, 35, 41,
97 4, 4, 5, 5, 6, 7, 7, 9,
98 4, 4, 5, 6, 7, 7, 9, 9,
99 5, 5, 6, 7, 7, 9, 9, 10,
100 5, 5, 6, 7, 7, 9, 9, 10,
101 5, 6, 7, 7, 8, 9, 10, 12,
102 6, 7, 7, 8, 9, 10, 12, 15,
103 6, 7, 7, 9, 10, 11, 14, 17,
104 7, 7, 9, 10, 11, 14, 17, 21,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 5,
111 4, 4, 4, 4, 4, 4, 5, 5,
112 4, 4, 4, 4, 4, 5, 5, 6,
113 4, 4, 4, 4, 5, 5, 6, 7,
114 4, 4, 4, 4, 5, 6, 7, 7,
117 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 2,
119 2, 2, 2, 2, 2, 2, 2, 2,
120 2, 2, 2, 2, 2, 2, 2, 3,
121 2, 2, 2, 2, 2, 2, 3, 3,
122 2, 2, 2, 2, 2, 3, 3, 3,
123 2, 2, 2, 2, 3, 3, 3, 4,
124 2, 2, 2, 2, 3, 3, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4,
138 #define NUM_MB_LIMITS 4
139 static const int prores_mb_limits[NUM_MB_LIMITS] = {
140 1620, // up to 720x576
141 2700, // up to 960x720
142 6075, // up to 1440x1080
143 9216, // up to 2048x1152
146 static const struct prores_profile {
147 const char *full_name;
151 int br_tab[NUM_MB_LIMITS];
154 } prores_profile_info[6] = {
156 .full_name = "proxy",
157 .tag = MKTAG('a', 'p', 'c', 'o'),
160 .br_tab = { 300, 242, 220, 194 },
161 .quant = QUANT_MAT_PROXY,
162 .quant_chroma = QUANT_MAT_PROXY_CHROMA,
166 .tag = MKTAG('a', 'p', 'c', 's'),
169 .br_tab = { 720, 560, 490, 440 },
170 .quant = QUANT_MAT_LT,
171 .quant_chroma = QUANT_MAT_LT,
174 .full_name = "standard",
175 .tag = MKTAG('a', 'p', 'c', 'n'),
178 .br_tab = { 1050, 808, 710, 632 },
179 .quant = QUANT_MAT_STANDARD,
180 .quant_chroma = QUANT_MAT_STANDARD,
183 .full_name = "high quality",
184 .tag = MKTAG('a', 'p', 'c', 'h'),
187 .br_tab = { 1566, 1216, 1070, 950 },
188 .quant = QUANT_MAT_HQ,
189 .quant_chroma = QUANT_MAT_HQ,
193 .tag = MKTAG('a', 'p', '4', 'h'),
196 .br_tab = { 2350, 1828, 1600, 1425 },
197 .quant = QUANT_MAT_HQ,
198 .quant_chroma = QUANT_MAT_HQ,
201 .full_name = "4444XQ",
202 .tag = MKTAG('a', 'p', '4', 'x'),
205 .br_tab = { 3525, 2742, 2400, 2137 },
206 .quant = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
207 .quant_chroma = QUANT_MAT_HQ,
211 #define TRELLIS_WIDTH 16
212 #define SCORE_LIMIT INT_MAX / 2
221 #define MAX_STORED_Q 16
223 typedef struct ProresThreadData {
224 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
225 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
226 int16_t custom_q[64];
227 int16_t custom_chroma_q[64];
228 struct TrellisNode *nodes;
231 typedef struct ProresContext {
233 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
234 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
235 int16_t quants[MAX_STORED_Q][64];
236 int16_t quants_chroma[MAX_STORED_Q][64];
237 int16_t custom_q[64];
238 int16_t custom_chroma_q[64];
239 const uint8_t *quant_mat;
240 const uint8_t *quant_chroma_mat;
241 const uint8_t *scantable;
243 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
244 ptrdiff_t linesize, int16_t *block);
248 int mb_width, mb_height;
250 int num_chroma_blocks, chroma_factor;
252 int slices_per_picture;
253 int pictures_per_frame; // 1 for progressive, 2 for interlaced
264 int frame_size_upper_bound;
267 const struct prores_profile *profile_info;
271 ProresThreadData *tdata;
274 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
275 ptrdiff_t linesize, int x, int y, int w, int h,
276 int16_t *blocks, uint16_t *emu_buf,
277 int mbs_per_slice, int blocks_per_mb, int is_chroma)
279 const uint16_t *esrc;
280 const int mb_width = 4 * blocks_per_mb;
284 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
286 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
290 if (x + mb_width <= w && y + 16 <= h) {
292 elinesize = linesize;
297 elinesize = 16 * sizeof(*emu_buf);
299 bw = FFMIN(w - x, mb_width);
300 bh = FFMIN(h - y, 16);
302 for (j = 0; j < bh; j++) {
303 memcpy(emu_buf + j * 16,
304 (const uint8_t*)src + j * linesize,
306 pix = emu_buf[j * 16 + bw - 1];
307 for (k = bw; k < mb_width; k++)
308 emu_buf[j * 16 + k] = pix;
311 memcpy(emu_buf + j * 16,
312 emu_buf + (bh - 1) * 16,
313 mb_width * sizeof(*emu_buf));
316 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
318 if (blocks_per_mb > 2) {
319 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
322 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
324 if (blocks_per_mb > 2) {
325 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
329 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
331 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
333 if (blocks_per_mb > 2) {
334 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
336 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
345 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
346 ptrdiff_t linesize, int x, int y, int w, int h,
347 int16_t *blocks, int mbs_per_slice, int abits)
349 const int slice_width = 16 * mbs_per_slice;
350 int i, j, copy_w, copy_h;
352 copy_w = FFMIN(w - x, slice_width);
353 copy_h = FFMIN(h - y, 16);
354 for (i = 0; i < copy_h; i++) {
355 memcpy(blocks, src, copy_w * sizeof(*src));
357 for (j = 0; j < copy_w; j++)
360 for (j = 0; j < copy_w; j++)
361 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
362 for (j = copy_w; j < slice_width; j++)
363 blocks[j] = blocks[copy_w - 1];
364 blocks += slice_width;
365 src += linesize >> 1;
367 for (; i < 16; i++) {
368 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
369 blocks += slice_width;
374 * Write an unsigned rice/exp golomb codeword.
376 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
378 unsigned int rice_order, exp_order, switch_bits, switch_val;
381 /* number of prefix bits to switch between Rice and expGolomb */
382 switch_bits = (codebook & 3) + 1;
383 rice_order = codebook >> 5; /* rice code order */
384 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
386 switch_val = switch_bits << rice_order;
388 if (val >= switch_val) {
389 val -= switch_val - (1 << exp_order);
390 exponent = av_log2(val);
392 put_bits(pb, exponent - exp_order + switch_bits, 0);
393 put_bits(pb, exponent + 1, val);
395 exponent = val >> rice_order;
398 put_bits(pb, exponent, 0);
401 put_sbits(pb, rice_order, val);
405 #define GET_SIGN(x) ((x) >> 31)
406 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
408 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
409 int blocks_per_slice, int scale)
412 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
414 prev_dc = (blocks[0] - 0x4000) / scale;
415 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
420 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
421 dc = (blocks[0] - 0x4000) / scale;
422 delta = dc - prev_dc;
423 new_sign = GET_SIGN(delta);
424 delta = (delta ^ sign) - sign;
425 code = MAKE_CODE(delta);
426 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
427 codebook = (code + (code & 1)) >> 1;
428 codebook = FFMIN(codebook, 3);
434 static void encode_acs(PutBitContext *pb, int16_t *blocks,
435 int blocks_per_slice,
436 int plane_size_factor,
437 const uint8_t *scan, const int16_t *qmat)
440 int run, level, run_cb, lev_cb;
441 int max_coeffs, abs_level;
443 max_coeffs = blocks_per_slice << 6;
444 run_cb = ff_prores_run_to_cb_index[4];
445 lev_cb = ff_prores_lev_to_cb_index[2];
448 for (i = 1; i < 64; i++) {
449 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
450 level = blocks[idx] / qmat[scan[i]];
452 abs_level = FFABS(level);
453 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
454 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
456 put_sbits(pb, 1, GET_SIGN(level));
458 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
459 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
468 static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
469 const uint16_t *src, ptrdiff_t linesize,
470 int mbs_per_slice, int16_t *blocks,
471 int blocks_per_mb, int plane_size_factor,
474 int blocks_per_slice = mbs_per_slice * blocks_per_mb;
476 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
477 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
478 ctx->scantable, qmat);
481 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
483 const int dbits = (abits == 8) ? 4 : 7;
484 const int dsize = 1 << dbits - 1;
485 int diff = cur - prev;
487 diff = av_mod_uintp2(diff, abits);
488 if (diff >= (1 << abits) - dsize)
490 if (diff < -dsize || diff > dsize || !diff) {
492 put_bits(pb, abits, diff);
495 put_bits(pb, dbits - 1, FFABS(diff) - 1);
496 put_bits(pb, 1, diff < 0);
500 static void put_alpha_run(PutBitContext *pb, int run)
505 put_bits(pb, 4, run);
507 put_bits(pb, 15, run);
513 // todo alpha quantisation for high quants
514 static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
515 int mbs_per_slice, uint16_t *blocks,
518 const int abits = ctx->alpha_bits;
519 const int mask = (1 << abits) - 1;
520 const int num_coeffs = mbs_per_slice * 256;
521 int prev = mask, cur;
526 put_alpha_diff(pb, cur, prev, abits);
531 put_alpha_run (pb, run);
532 put_alpha_diff(pb, cur, prev, abits);
538 } while (idx < num_coeffs);
540 put_alpha_run(pb, run);
543 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
545 int sizes[4], int x, int y, int quant,
548 ProresContext *ctx = avctx->priv_data;
552 int slice_width_factor = av_log2(mbs_per_slice);
553 int num_cblocks, pwidth, line_add;
555 int plane_factor, is_chroma;
557 uint16_t *qmat_chroma;
559 if (ctx->pictures_per_frame == 1)
562 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
564 if (ctx->force_quant) {
565 qmat = ctx->quants[0];
566 qmat_chroma = ctx->quants_chroma[0];
567 } else if (quant < MAX_STORED_Q) {
568 qmat = ctx->quants[quant];
569 qmat_chroma = ctx->quants_chroma[quant];
571 qmat = ctx->custom_q;
572 qmat_chroma = ctx->custom_chroma_q;
573 for (i = 0; i < 64; i++) {
574 qmat[i] = ctx->quant_mat[i] * quant;
575 qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
579 for (i = 0; i < ctx->num_planes; i++) {
580 is_chroma = (i == 1 || i == 2);
581 plane_factor = slice_width_factor + 2;
583 plane_factor += ctx->chroma_factor - 3;
584 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
588 pwidth = avctx->width;
593 pwidth = avctx->width >> 1;
596 linesize = pic->linesize[i] * ctx->pictures_per_frame;
597 src = (const uint16_t*)(pic->data[i] + yp * linesize +
598 line_add * pic->linesize[i]) + xp;
601 get_slice_data(ctx, src, linesize, xp, yp,
602 pwidth, avctx->height / ctx->pictures_per_frame,
603 ctx->blocks[0], ctx->emu_buf,
604 mbs_per_slice, num_cblocks, is_chroma);
605 if (!is_chroma) {/* luma quant */
606 encode_slice_plane(ctx, pb, src, linesize,
607 mbs_per_slice, ctx->blocks[0],
608 num_cblocks, plane_factor, qmat);
609 } else { /* chroma plane */
610 encode_slice_plane(ctx, pb, src, linesize,
611 mbs_per_slice, ctx->blocks[0],
612 num_cblocks, plane_factor, qmat_chroma);
615 get_alpha_data(ctx, src, linesize, xp, yp,
616 pwidth, avctx->height / ctx->pictures_per_frame,
617 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
618 encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
621 sizes[i] = put_bytes_output(pb) - total_size;
622 total_size = put_bytes_output(pb);
627 static inline int estimate_vlc(unsigned codebook, int val)
629 unsigned int rice_order, exp_order, switch_bits, switch_val;
632 /* number of prefix bits to switch between Rice and expGolomb */
633 switch_bits = (codebook & 3) + 1;
634 rice_order = codebook >> 5; /* rice code order */
635 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
637 switch_val = switch_bits << rice_order;
639 if (val >= switch_val) {
640 val -= switch_val - (1 << exp_order);
641 exponent = av_log2(val);
643 return exponent * 2 - exp_order + switch_bits + 1;
645 return (val >> rice_order) + rice_order + 1;
649 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
653 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
656 prev_dc = (blocks[0] - 0x4000) / scale;
657 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
661 *error += FFABS(blocks[0] - 0x4000) % scale;
663 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
664 dc = (blocks[0] - 0x4000) / scale;
665 *error += FFABS(blocks[0] - 0x4000) % scale;
666 delta = dc - prev_dc;
667 new_sign = GET_SIGN(delta);
668 delta = (delta ^ sign) - sign;
669 code = MAKE_CODE(delta);
670 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
671 codebook = (code + (code & 1)) >> 1;
672 codebook = FFMIN(codebook, 3);
680 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
681 int plane_size_factor,
682 const uint8_t *scan, const int16_t *qmat)
685 int run, level, run_cb, lev_cb;
686 int max_coeffs, abs_level;
689 max_coeffs = blocks_per_slice << 6;
690 run_cb = ff_prores_run_to_cb_index[4];
691 lev_cb = ff_prores_lev_to_cb_index[2];
694 for (i = 1; i < 64; i++) {
695 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
696 level = blocks[idx] / qmat[scan[i]];
697 *error += FFABS(blocks[idx]) % qmat[scan[i]];
699 abs_level = FFABS(level);
700 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
701 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
704 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
705 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
716 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
717 const uint16_t *src, ptrdiff_t linesize,
719 int blocks_per_mb, int plane_size_factor,
720 const int16_t *qmat, ProresThreadData *td)
722 int blocks_per_slice;
725 blocks_per_slice = mbs_per_slice * blocks_per_mb;
727 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
728 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
729 plane_size_factor, ctx->scantable, qmat);
731 return FFALIGN(bits, 8);
734 static int est_alpha_diff(int cur, int prev, int abits)
736 const int dbits = (abits == 8) ? 4 : 7;
737 const int dsize = 1 << dbits - 1;
738 int diff = cur - prev;
740 diff = av_mod_uintp2(diff, abits);
741 if (diff >= (1 << abits) - dsize)
743 if (diff < -dsize || diff > dsize || !diff)
749 static int estimate_alpha_plane(ProresContext *ctx,
750 const uint16_t *src, ptrdiff_t linesize,
751 int mbs_per_slice, int16_t *blocks)
753 const int abits = ctx->alpha_bits;
754 const int mask = (1 << abits) - 1;
755 const int num_coeffs = mbs_per_slice * 256;
756 int prev = mask, cur;
762 bits = est_alpha_diff(cur, prev, abits);
773 bits += est_alpha_diff(cur, prev, abits);
779 } while (idx < num_coeffs);
791 static int find_slice_quant(AVCodecContext *avctx,
792 int trellis_node, int x, int y, int mbs_per_slice,
793 ProresThreadData *td)
795 ProresContext *ctx = avctx->priv_data;
796 int i, q, pq, xp, yp;
798 int slice_width_factor = av_log2(mbs_per_slice);
799 int num_cblocks[MAX_PLANES], pwidth;
800 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
801 const int min_quant = ctx->profile_info->min_quant;
802 const int max_quant = ctx->profile_info->max_quant;
803 int error, bits, bits_limit;
804 int mbs, prev, cur, new_score;
805 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
808 uint16_t *qmat_chroma;
809 int linesize[4], line_add;
812 if (ctx->pictures_per_frame == 1)
815 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
816 mbs = x + mbs_per_slice;
818 for (i = 0; i < ctx->num_planes; i++) {
819 is_chroma[i] = (i == 1 || i == 2);
820 plane_factor[i] = slice_width_factor + 2;
822 plane_factor[i] += ctx->chroma_factor - 3;
823 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
827 pwidth = avctx->width;
832 pwidth = avctx->width >> 1;
835 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
836 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
837 line_add * ctx->pic->linesize[i]) + xp;
840 get_slice_data(ctx, src, linesize[i], xp, yp,
841 pwidth, avctx->height / ctx->pictures_per_frame,
842 td->blocks[i], td->emu_buf,
843 mbs_per_slice, num_cblocks[i], is_chroma[i]);
845 get_alpha_data(ctx, src, linesize[i], xp, yp,
846 pwidth, avctx->height / ctx->pictures_per_frame,
847 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
851 for (q = min_quant; q < max_quant + 2; q++) {
852 td->nodes[trellis_node + q].prev_node = -1;
853 td->nodes[trellis_node + q].quant = q;
857 alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
858 mbs_per_slice, td->blocks[3]);
859 // todo: maybe perform coarser quantising to fit into frame size when needed
860 for (q = min_quant; q <= max_quant; q++) {
863 bits += estimate_slice_plane(ctx, &error, 0,
866 num_cblocks[0], plane_factor[0],
867 ctx->quants[q], td); /* estimate luma plane */
868 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
869 bits += estimate_slice_plane(ctx, &error, i,
872 num_cblocks[i], plane_factor[i],
873 ctx->quants_chroma[q], td);
875 if (bits > 65000 * 8)
878 slice_bits[q] = bits;
879 slice_score[q] = error;
881 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
882 slice_bits[max_quant + 1] = slice_bits[max_quant];
883 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
884 overquant = max_quant;
886 for (q = max_quant + 1; q < 128; q++) {
889 if (q < MAX_STORED_Q) {
890 qmat = ctx->quants[q];
891 qmat_chroma = ctx->quants_chroma[q];
894 qmat_chroma = td->custom_chroma_q;
895 for (i = 0; i < 64; i++) {
896 qmat[i] = ctx->quant_mat[i] * q;
897 qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
900 bits += estimate_slice_plane(ctx, &error, 0,
903 num_cblocks[0], plane_factor[0],
904 qmat, td);/* estimate luma plane */
905 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
906 bits += estimate_slice_plane(ctx, &error, i,
909 num_cblocks[i], plane_factor[i],
912 if (bits <= ctx->bits_per_mb * mbs_per_slice)
916 slice_bits[max_quant + 1] = bits;
917 slice_score[max_quant + 1] = error;
920 td->nodes[trellis_node + max_quant + 1].quant = overquant;
922 bits_limit = mbs * ctx->bits_per_mb;
923 for (pq = min_quant; pq < max_quant + 2; pq++) {
924 prev = trellis_node - TRELLIS_WIDTH + pq;
926 for (q = min_quant; q < max_quant + 2; q++) {
927 cur = trellis_node + q;
929 bits = td->nodes[prev].bits + slice_bits[q];
930 error = slice_score[q];
931 if (bits > bits_limit)
934 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
935 new_score = td->nodes[prev].score + error;
937 new_score = SCORE_LIMIT;
938 if (td->nodes[cur].prev_node == -1 ||
939 td->nodes[cur].score >= new_score) {
941 td->nodes[cur].bits = bits;
942 td->nodes[cur].score = new_score;
943 td->nodes[cur].prev_node = prev;
948 error = td->nodes[trellis_node + min_quant].score;
949 pq = trellis_node + min_quant;
950 for (q = min_quant + 1; q < max_quant + 2; q++) {
951 if (td->nodes[trellis_node + q].score <= error) {
952 error = td->nodes[trellis_node + q].score;
953 pq = trellis_node + q;
960 static int find_quant_thread(AVCodecContext *avctx, void *arg,
961 int jobnr, int threadnr)
963 ProresContext *ctx = avctx->priv_data;
964 ProresThreadData *td = ctx->tdata + threadnr;
965 int mbs_per_slice = ctx->mbs_per_slice;
966 int x, y = jobnr, mb, q = 0;
968 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
969 while (ctx->mb_width - x < mbs_per_slice)
971 q = find_slice_quant(avctx,
972 (mb + 1) * TRELLIS_WIDTH, x, y,
976 for (x = ctx->slices_width - 1; x >= 0; x--) {
977 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
978 q = td->nodes[q].prev_node;
984 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
985 const AVFrame *pic, int *got_packet)
987 ProresContext *ctx = avctx->priv_data;
988 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
989 uint8_t *picture_size_pos;
991 int x, y, i, mb, q = 0;
992 int sizes[4] = { 0 };
993 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
994 int frame_size, picture_size, slice_size;
996 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
1000 pkt_size = ctx->frame_size_upper_bound;
1002 if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1005 orig_buf = pkt->data;
1008 orig_buf += 4; // frame size
1009 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
1014 buf += 2; // frame header size will be stored here
1015 bytestream_put_be16 (&buf, 0); // version 1
1016 bytestream_put_buffer(&buf, ctx->vendor, 4);
1017 bytestream_put_be16 (&buf, avctx->width);
1018 bytestream_put_be16 (&buf, avctx->height);
1020 frame_flags = ctx->chroma_factor << 6;
1021 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1022 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1023 bytestream_put_byte (&buf, frame_flags);
1025 bytestream_put_byte (&buf, 0); // reserved
1026 bytestream_put_byte (&buf, pic->color_primaries);
1027 bytestream_put_byte (&buf, pic->color_trc);
1028 bytestream_put_byte (&buf, pic->colorspace);
1029 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
1030 bytestream_put_byte (&buf, 0); // reserved
1031 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1032 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
1033 // luma quantisation matrix
1034 for (i = 0; i < 64; i++)
1035 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1036 // chroma quantisation matrix
1037 for (i = 0; i < 64; i++)
1038 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1040 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
1042 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
1044 for (ctx->cur_picture_idx = 0;
1045 ctx->cur_picture_idx < ctx->pictures_per_frame;
1046 ctx->cur_picture_idx++) {
1048 picture_size_pos = buf + 1;
1049 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1050 buf += 4; // picture data size will be stored here
1051 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1052 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1054 // seek table - will be filled during slice encoding
1056 buf += ctx->slices_per_picture * 2;
1059 if (!ctx->force_quant) {
1060 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1066 for (y = 0; y < ctx->mb_height; y++) {
1067 int mbs_per_slice = ctx->mbs_per_slice;
1068 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1069 q = ctx->force_quant ? ctx->force_quant
1070 : ctx->slice_q[mb + y * ctx->slices_width];
1072 while (ctx->mb_width - x < mbs_per_slice)
1073 mbs_per_slice >>= 1;
1075 bytestream_put_byte(&buf, slice_hdr_size << 3);
1077 buf += slice_hdr_size - 1;
1078 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1079 uint8_t *start = pkt->data;
1080 // Recompute new size according to max_slice_size
1082 int delta = 200 + (ctx->pictures_per_frame *
1083 ctx->slices_per_picture + 1) *
1084 max_slice_size - pkt_size;
1086 delta = FFMAX(delta, 2 * max_slice_size);
1087 ctx->frame_size_upper_bound += delta;
1090 avpriv_request_sample(avctx,
1091 "Packet too small: is %i,"
1092 " needs %i (slice: %i). "
1093 "Correct allocation",
1094 pkt_size, delta, max_slice_size);
1098 ret = av_grow_packet(pkt, delta);
1104 orig_buf = pkt->data + (orig_buf - start);
1105 buf = pkt->data + (buf - start);
1106 picture_size_pos = pkt->data + (picture_size_pos - start);
1107 slice_sizes = pkt->data + (slice_sizes - start);
1108 slice_hdr = pkt->data + (slice_hdr - start);
1109 tmp = pkt->data + (tmp - start);
1111 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1112 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1117 bytestream_put_byte(&slice_hdr, q);
1118 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1119 for (i = 0; i < ctx->num_planes - 1; i++) {
1120 bytestream_put_be16(&slice_hdr, sizes[i]);
1121 slice_size += sizes[i];
1123 bytestream_put_be16(&slice_sizes, slice_size);
1124 buf += slice_size - slice_hdr_size;
1125 if (max_slice_size < slice_size)
1126 max_slice_size = slice_size;
1130 picture_size = buf - (picture_size_pos - 1);
1131 bytestream_put_be32(&picture_size_pos, picture_size);
1135 frame_size = buf - orig_buf;
1136 bytestream_put_be32(&orig_buf, frame_size);
1138 pkt->size = frame_size;
1144 static av_cold int encode_close(AVCodecContext *avctx)
1146 ProresContext *ctx = avctx->priv_data;
1150 for (i = 0; i < avctx->thread_count; i++)
1151 av_freep(&ctx->tdata[i].nodes);
1153 av_freep(&ctx->tdata);
1154 av_freep(&ctx->slice_q);
1159 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160 ptrdiff_t linesize, int16_t *block)
1163 const uint16_t *tsrc = src;
1165 for (y = 0; y < 8; y++) {
1166 for (x = 0; x < 8; x++)
1167 block[y * 8 + x] = tsrc[x];
1168 tsrc += linesize >> 1;
1173 static av_cold int encode_init(AVCodecContext *avctx)
1175 ProresContext *ctx = avctx->priv_data;
1178 int min_quant, max_quant;
1179 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1181 avctx->bits_per_raw_sample = 10;
1183 ctx->fdct = prores_fdct;
1184 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1185 : ff_prores_progressive_scan;
1186 ff_fdctdsp_init(&ctx->fdsp, avctx);
1188 mps = ctx->mbs_per_slice;
1189 if (mps & (mps - 1)) {
1190 av_log(avctx, AV_LOG_ERROR,
1191 "there should be an integer power of two MBs per slice\n");
1192 return AVERROR(EINVAL);
1194 if (ctx->profile == PRORES_PROFILE_AUTO) {
1195 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1196 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1197 !(desc->log2_chroma_w + desc->log2_chroma_h))
1198 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1199 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1200 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1201 ? "4:4:4:4 profile because of the used input colorspace"
1202 : "HQ profile to keep best quality");
1204 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1205 if (ctx->profile != PRORES_PROFILE_4444 &&
1206 ctx->profile != PRORES_PROFILE_4444XQ) {
1207 // force alpha and warn
1208 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1209 "encode alpha. Override with -profile if needed.\n");
1210 ctx->alpha_bits = 0;
1212 if (ctx->alpha_bits & 7) {
1213 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1214 return AVERROR(EINVAL);
1216 avctx->bits_per_coded_sample = 32;
1218 ctx->alpha_bits = 0;
1221 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1224 ctx->profile_info = prores_profile_info + ctx->profile;
1225 ctx->num_planes = 3 + !!ctx->alpha_bits;
1227 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1230 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1232 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1234 ctx->slices_width = ctx->mb_width / mps;
1235 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1236 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1237 ctx->pictures_per_frame = 1 + interlaced;
1239 if (ctx->quant_sel == -1) {
1240 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1241 ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1243 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1244 ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1247 if (strlen(ctx->vendor) != 4) {
1248 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1249 return AVERROR_INVALIDDATA;
1252 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1253 if (!ctx->force_quant) {
1254 if (!ctx->bits_per_mb) {
1255 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1256 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1257 ctx->pictures_per_frame)
1259 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1260 if (ctx->alpha_bits)
1261 ctx->bits_per_mb *= 20;
1262 } else if (ctx->bits_per_mb < 128) {
1263 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1264 return AVERROR_INVALIDDATA;
1267 min_quant = ctx->profile_info->min_quant;
1268 max_quant = ctx->profile_info->max_quant;
1269 for (i = min_quant; i < MAX_STORED_Q; i++) {
1270 for (j = 0; j < 64; j++) {
1271 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1272 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1276 ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1278 return AVERROR(ENOMEM);
1280 ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1282 return AVERROR(ENOMEM);
1284 for (j = 0; j < avctx->thread_count; j++) {
1285 ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1287 * sizeof(*ctx->tdata->nodes));
1288 if (!ctx->tdata[j].nodes)
1289 return AVERROR(ENOMEM);
1290 for (i = min_quant; i < max_quant + 2; i++) {
1291 ctx->tdata[j].nodes[i].prev_node = -1;
1292 ctx->tdata[j].nodes[i].bits = 0;
1293 ctx->tdata[j].nodes[i].score = 0;
1300 if (ctx->force_quant > 64) {
1301 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1302 return AVERROR_INVALIDDATA;
1305 for (j = 0; j < 64; j++) {
1306 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1307 ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1308 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1309 ls_chroma += av_log2((1 << 11) / ctx->quants_chroma[0][j]) * 2 + 1;
1312 ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1313 if (ctx->chroma_factor == CFACTOR_Y444)
1314 ctx->bits_per_mb += ls_chroma * 4;
1317 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1318 ctx->slices_per_picture + 1) *
1319 (2 + 2 * ctx->num_planes +
1320 (mps * ctx->bits_per_mb) / 8)
1323 if (ctx->alpha_bits) {
1324 // The alpha plane is run-coded and might exceed the bit budget.
1325 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1326 ctx->slices_per_picture + 1) *
1327 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1328 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1331 avctx->codec_tag = ctx->profile_info->tag;
1333 av_log(avctx, AV_LOG_DEBUG,
1334 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1335 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1336 interlaced ? "yes" : "no", ctx->bits_per_mb);
1337 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1338 ctx->frame_size_upper_bound);
1343 #define OFFSET(x) offsetof(ProresContext, x)
1344 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1346 static const AVOption options[] = {
1347 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1348 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1349 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1350 { .i64 = PRORES_PROFILE_AUTO },
1351 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1352 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1353 0, 0, VE, "profile" },
1354 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1355 0, 0, VE, "profile" },
1356 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1357 0, 0, VE, "profile" },
1358 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1359 0, 0, VE, "profile" },
1360 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1361 0, 0, VE, "profile" },
1362 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1363 0, 0, VE, "profile" },
1364 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1365 0, 0, VE, "profile" },
1366 { "vendor", "vendor ID", OFFSET(vendor),
1367 AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1368 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1369 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1370 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1371 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1372 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1373 0, 0, VE, "quant_mat" },
1374 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1375 0, 0, VE, "quant_mat" },
1376 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1377 0, 0, VE, "quant_mat" },
1378 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1379 0, 0, VE, "quant_mat" },
1380 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1381 0, 0, VE, "quant_mat" },
1382 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1383 0, 0, VE, "quant_mat" },
1384 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1385 { .i64 = 16 }, 0, 16, VE },
1389 static const AVClass proresenc_class = {
1390 .class_name = "ProRes encoder",
1391 .item_name = av_default_item_name,
1393 .version = LIBAVUTIL_VERSION_INT,
1396 const FFCodec ff_prores_ks_encoder = {
1397 .p.name = "prores_ks",
1398 .p.long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1399 .p.type = AVMEDIA_TYPE_VIDEO,
1400 .p.id = AV_CODEC_ID_PRORES,
1401 .priv_data_size = sizeof(ProresContext),
1402 .init = encode_init,
1403 .close = encode_close,
1404 FF_CODEC_ENCODE_CB(encode_frame),
1405 .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1406 .p.pix_fmts = (const enum AVPixelFormat[]) {
1407 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1408 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1410 .p.priv_class = &proresenc_class,
1411 .p.profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1412 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,