libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/mem_internal.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/pixdesc.h"
  29 #include "avcodec.h"
  30 #include "codec_internal.h"
  31 #include "encode.h"
  32 #include "fdctdsp.h"
  33 #include "put_bits.h"
  34 #include "profiles.h"
  35 #include "bytestream.h"
  36 #include "proresdata.h"
  37
  38 #define CFACTOR_Y422 2
  39 #define CFACTOR_Y444 3
  40
  41 #define MAX_MBS_PER_SLICE 8
  42
  43 #define MAX_PLANES 4
  44
  45 enum {
  46     PRORES_PROFILE_AUTO  = -1,
  47     PRORES_PROFILE_PROXY = 0,
  48     PRORES_PROFILE_LT,
  49     PRORES_PROFILE_STANDARD,
  50     PRORES_PROFILE_HQ,
  51     PRORES_PROFILE_4444,
  52     PRORES_PROFILE_4444XQ,
  53 };
  54
  55 enum {
  56     QUANT_MAT_PROXY = 0,
  57     QUANT_MAT_PROXY_CHROMA,
  58     QUANT_MAT_LT,
  59     QUANT_MAT_STANDARD,
  60     QUANT_MAT_HQ,
  61     QUANT_MAT_XQ_LUMA,
  62     QUANT_MAT_DEFAULT,
  63 };
  64
  65 static const uint8_t prores_quant_matrices[][64] = {
  66     { // proxy
  67          4,  7,  9, 11, 13, 14, 15, 63,
  68          7,  7, 11, 12, 14, 15, 63, 63,
  69          9, 11, 13, 14, 15, 63, 63, 63,
  70         11, 11, 13, 14, 63, 63, 63, 63,
  71         11, 13, 14, 63, 63, 63, 63, 63,
  72         13, 14, 63, 63, 63, 63, 63, 63,
  73         13, 63, 63, 63, 63, 63, 63, 63,
  74         63, 63, 63, 63, 63, 63, 63, 63,
  75     },
  76     { // proxy chromas
  77         4,  7,  9, 11, 13, 14, 63, 63,
  78         7,  7, 11, 12, 14, 63, 63, 63,
  79         9, 11, 13, 14, 63, 63, 63, 63,
  80         11, 11, 13, 14, 63, 63, 63, 63,
  81         11, 13, 14, 63, 63, 63, 63, 63,
  82         13, 14, 63, 63, 63, 63, 63, 63,
  83         13, 63, 63, 63, 63, 63, 63, 63,
  84         63, 63, 63, 63, 63, 63, 63, 63
  85     },
  86     { // LT
  87          4,  5,  6,  7,  9, 11, 13, 15,
  88          5,  5,  7,  8, 11, 13, 15, 17,
  89          6,  7,  9, 11, 13, 15, 15, 17,
  90          7,  7,  9, 11, 13, 15, 17, 19,
  91          7,  9, 11, 13, 14, 16, 19, 23,
  92          9, 11, 13, 14, 16, 19, 23, 29,
  93          9, 11, 13, 15, 17, 21, 28, 35,
  94         11, 13, 16, 17, 21, 28, 35, 41,
  95     },
  96     { // standard
  97          4,  4,  5,  5,  6,  7,  7,  9,
  98          4,  4,  5,  6,  7,  7,  9,  9,
  99          5,  5,  6,  7,  7,  9,  9, 10,
 100          5,  5,  6,  7,  7,  9,  9, 10,
 101          5,  6,  7,  7,  8,  9, 10, 12,
 102          6,  7,  7,  8,  9, 10, 12, 15,
 103          6,  7,  7,  9, 10, 11, 14, 17,
 104          7,  7,  9, 10, 11, 14, 17, 21,
 105     },
 106     { // high quality
 107          4,  4,  4,  4,  4,  4,  4,  4,
 108          4,  4,  4,  4,  4,  4,  4,  4,
 109          4,  4,  4,  4,  4,  4,  4,  4,
 110          4,  4,  4,  4,  4,  4,  4,  5,
 111          4,  4,  4,  4,  4,  4,  5,  5,
 112          4,  4,  4,  4,  4,  5,  5,  6,
 113          4,  4,  4,  4,  5,  5,  6,  7,
 114          4,  4,  4,  4,  5,  6,  7,  7,
 115     },
 116     { // XQ luma
 117         2,  2,  2,  2,  2,  2,  2,  2,
 118         2,  2,  2,  2,  2,  2,  2,  2,
 119         2,  2,  2,  2,  2,  2,  2,  2,
 120         2,  2,  2,  2,  2,  2,  2,  3,
 121         2,  2,  2,  2,  2,  2,  3,  3,
 122         2,  2,  2,  2,  2,  3,  3,  3,
 123         2,  2,  2,  2,  3,  3,  3,  4,
 124         2,  2,  2,  2,  3,  3,  4,  4,
 125     },
 126     { // codec default
 127          4,  4,  4,  4,  4,  4,  4,  4,
 128          4,  4,  4,  4,  4,  4,  4,  4,
 129          4,  4,  4,  4,  4,  4,  4,  4,
 130          4,  4,  4,  4,  4,  4,  4,  4,
 131          4,  4,  4,  4,  4,  4,  4,  4,
 132          4,  4,  4,  4,  4,  4,  4,  4,
 133          4,  4,  4,  4,  4,  4,  4,  4,
 134          4,  4,  4,  4,  4,  4,  4,  4,
 135     },
 136 };
 137
 138 #define NUM_MB_LIMITS 4
 139 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 140     1620, // up to 720x576
 141     2700, // up to 960x720
 142     6075, // up to 1440x1080
 143     9216, // up to 2048x1152
 144 };
 145
 146 static const struct prores_profile {
 147     const char *full_name;
 148     uint32_t    tag;
 149     int         min_quant;
 150     int         max_quant;
 151     int         br_tab[NUM_MB_LIMITS];
 152     int         quant;
 153     int         quant_chroma;
 154 } prores_profile_info[6] = {
 155     {
 156         .full_name = "proxy",
 157         .tag       = MKTAG('a', 'p', 'c', 'o'),
 158         .min_quant = 4,
 159         .max_quant = 8,
 160         .br_tab    = { 300, 242, 220, 194 },
 161         .quant     = QUANT_MAT_PROXY,
 162         .quant_chroma = QUANT_MAT_PROXY_CHROMA,
 163     },
 164     {
 165         .full_name = "LT",
 166         .tag       = MKTAG('a', 'p', 'c', 's'),
 167         .min_quant = 1,
 168         .max_quant = 9,
 169         .br_tab    = { 720, 560, 490, 440 },
 170         .quant     = QUANT_MAT_LT,
 171         .quant_chroma = QUANT_MAT_LT,
 172     },
 173     {
 174         .full_name = "standard",
 175         .tag       = MKTAG('a', 'p', 'c', 'n'),
 176         .min_quant = 1,
 177         .max_quant = 6,
 178         .br_tab    = { 1050, 808, 710, 632 },
 179         .quant     = QUANT_MAT_STANDARD,
 180         .quant_chroma = QUANT_MAT_STANDARD,
 181     },
 182     {
 183         .full_name = "high quality",
 184         .tag       = MKTAG('a', 'p', 'c', 'h'),
 185         .min_quant = 1,
 186         .max_quant = 6,
 187         .br_tab    = { 1566, 1216, 1070, 950 },
 188         .quant     = QUANT_MAT_HQ,
 189         .quant_chroma = QUANT_MAT_HQ,
 190     },
 191     {
 192         .full_name = "4444",
 193         .tag       = MKTAG('a', 'p', '4', 'h'),
 194         .min_quant = 1,
 195         .max_quant = 6,
 196         .br_tab    = { 2350, 1828, 1600, 1425 },
 197         .quant     = QUANT_MAT_HQ,
 198         .quant_chroma = QUANT_MAT_HQ,
 199     },
 200     {
 201         .full_name = "4444XQ",
 202         .tag       = MKTAG('a', 'p', '4', 'x'),
 203         .min_quant = 1,
 204         .max_quant = 6,
 205         .br_tab    = { 3525, 2742, 2400, 2137 },
 206         .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
 207         .quant_chroma = QUANT_MAT_HQ,
 208     }
 209 };
 210
 211 #define TRELLIS_WIDTH 16
 212 #define SCORE_LIMIT   INT_MAX / 2
 213
 214 struct TrellisNode {
 215     int prev_node;
 216     int quant;
 217     int bits;
 218     int score;
 219 };
 220
 221 #define MAX_STORED_Q 16
 222
 223 typedef struct ProresThreadData {
 224     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 225     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 226     int16_t custom_q[64];
 227     int16_t custom_chroma_q[64];
 228     struct TrellisNode *nodes;
 229 } ProresThreadData;
 230
 231 typedef struct ProresContext {
 232     AVClass *class;
 233     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 234     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 235     int16_t quants[MAX_STORED_Q][64];
 236     int16_t quants_chroma[MAX_STORED_Q][64];
 237     int16_t custom_q[64];
 238     int16_t custom_chroma_q[64];
 239     const uint8_t *quant_mat;
 240     const uint8_t *quant_chroma_mat;
 241     const uint8_t *scantable;
 242
 243     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 244                  ptrdiff_t linesize, int16_t *block);
 245     FDCTDSPContext fdsp;
 246
 247     const AVFrame *pic;
 248     int mb_width, mb_height;
 249     int mbs_per_slice;
 250     int num_chroma_blocks, chroma_factor;
 251     int slices_width;
 252     int slices_per_picture;
 253     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 254     int cur_picture_idx;
 255     int num_planes;
 256     int bits_per_mb;
 257     int force_quant;
 258     int alpha_bits;
 259     int warn;
 260
 261     char *vendor;
 262     int quant_sel;
 263
 264     int frame_size_upper_bound;
 265
 266     int profile;
 267     const struct prores_profile *profile_info;
 268
 269     int *slice_q;
 270
 271     ProresThreadData *tdata;
 272 } ProresContext;
 273
 274 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 275                            ptrdiff_t linesize, int x, int y, int w, int h,
 276                            int16_t *blocks, uint16_t *emu_buf,
 277                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 278 {
 279     const uint16_t *esrc;
 280     const int mb_width = 4 * blocks_per_mb;
 281     ptrdiff_t elinesize;
 282     int i, j, k;
 283
 284     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 285         if (x >= w) {
 286             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 287                               * sizeof(*blocks));
 288             return;
 289         }
 290         if (x + mb_width <= w && y + 16 <= h) {
 291             esrc      = src;
 292             elinesize = linesize;
 293         } else {
 294             int bw, bh, pix;
 295
 296             esrc      = emu_buf;
 297             elinesize = 16 * sizeof(*emu_buf);
 298
 299             bw = FFMIN(w - x, mb_width);
 300             bh = FFMIN(h - y, 16);
 301
 302             for (j = 0; j < bh; j++) {
 303                 memcpy(emu_buf + j * 16,
 304                        (const uint8_t*)src + j * linesize,
 305                        bw * sizeof(*src));
 306                 pix = emu_buf[j * 16 + bw - 1];
 307                 for (k = bw; k < mb_width; k++)
 308                     emu_buf[j * 16 + k] = pix;
 309             }
 310             for (; j < 16; j++)
 311                 memcpy(emu_buf + j * 16,
 312                        emu_buf + (bh - 1) * 16,
 313                        mb_width * sizeof(*emu_buf));
 314         }
 315         if (!is_chroma) {
 316             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 317             blocks += 64;
 318             if (blocks_per_mb > 2) {
 319                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 320                 blocks += 64;
 321             }
 322             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 323             blocks += 64;
 324             if (blocks_per_mb > 2) {
 325                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 326                 blocks += 64;
 327             }
 328         } else {
 329             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 330             blocks += 64;
 331             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 332             blocks += 64;
 333             if (blocks_per_mb > 2) {
 334                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 335                 blocks += 64;
 336                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 337                 blocks += 64;
 338             }
 339         }
 340
 341         x += mb_width;
 342     }
 343 }
 344
 345 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 346                            ptrdiff_t linesize, int x, int y, int w, int h,
 347                            int16_t *blocks, int mbs_per_slice, int abits)
 348 {
 349     const int slice_width = 16 * mbs_per_slice;
 350     int i, j, copy_w, copy_h;
 351
 352     copy_w = FFMIN(w - x, slice_width);
 353     copy_h = FFMIN(h - y, 16);
 354     for (i = 0; i < copy_h; i++) {
 355         memcpy(blocks, src, copy_w * sizeof(*src));
 356         if (abits == 8)
 357             for (j = 0; j < copy_w; j++)
 358                 blocks[j] >>= 2;
 359         else
 360             for (j = 0; j < copy_w; j++)
 361                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 362         for (j = copy_w; j < slice_width; j++)
 363             blocks[j] = blocks[copy_w - 1];
 364         blocks += slice_width;
 365         src    += linesize >> 1;
 366     }
 367     for (; i < 16; i++) {
 368         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 369         blocks += slice_width;
 370     }
 371 }
 372
 373 /**
 374  * Write an unsigned rice/exp golomb codeword.
 375  */
 376 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 377 {
 378     unsigned int rice_order, exp_order, switch_bits, switch_val;
 379     int exponent;
 380
 381     /* number of prefix bits to switch between Rice and expGolomb */
 382     switch_bits = (codebook & 3) + 1;
 383     rice_order  =  codebook >> 5;       /* rice code order */
 384     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 385
 386     switch_val  = switch_bits << rice_order;
 387
 388     if (val >= switch_val) {
 389         val -= switch_val - (1 << exp_order);
 390         exponent = av_log2(val);
 391
 392         put_bits(pb, exponent - exp_order + switch_bits, 0);
 393         put_bits(pb, exponent + 1, val);
 394     } else {
 395         exponent = val >> rice_order;
 396
 397         if (exponent)
 398             put_bits(pb, exponent, 0);
 399         put_bits(pb, 1, 1);
 400         if (rice_order)
 401             put_sbits(pb, rice_order, val);
 402     }
 403 }
 404
 405 #define GET_SIGN(x)  ((x) >> 31)
 406 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
 407
 408 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 409                        int blocks_per_slice, int scale)
 410 {
 411     int i;
 412     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 413
 414     prev_dc = (blocks[0] - 0x4000) / scale;
 415     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 416     sign     = 0;
 417     codebook = 3;
 418     blocks  += 64;
 419
 420     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 421         dc       = (blocks[0] - 0x4000) / scale;
 422         delta    = dc - prev_dc;
 423         new_sign = GET_SIGN(delta);
 424         delta    = (delta ^ sign) - sign;
 425         code     = MAKE_CODE(delta);
 426         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 427         codebook = (code + (code & 1)) >> 1;
 428         codebook = FFMIN(codebook, 3);
 429         sign     = new_sign;
 430         prev_dc  = dc;
 431     }
 432 }
 433
 434 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 435                        int blocks_per_slice,
 436                        int plane_size_factor,
 437                        const uint8_t *scan, const int16_t *qmat)
 438 {
 439     int idx, i;
 440     int run, level, run_cb, lev_cb;
 441     int max_coeffs, abs_level;
 442
 443     max_coeffs = blocks_per_slice << 6;
 444     run_cb     = ff_prores_run_to_cb_index[4];
 445     lev_cb     = ff_prores_lev_to_cb_index[2];
 446     run        = 0;
 447
 448     for (i = 1; i < 64; i++) {
 449         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 450             level = blocks[idx] / qmat[scan[i]];
 451             if (level) {
 452                 abs_level = FFABS(level);
 453                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 454                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 455                                     abs_level - 1);
 456                 put_sbits(pb, 1, GET_SIGN(level));
 457
 458                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 459                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 460                 run    = 0;
 461             } else {
 462                 run++;
 463             }
 464         }
 465     }
 466 }
 467
 468 static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 469                               const uint16_t *src, ptrdiff_t linesize,
 470                               int mbs_per_slice, int16_t *blocks,
 471                               int blocks_per_mb, int plane_size_factor,
 472                               const int16_t *qmat)
 473 {
 474     int blocks_per_slice = mbs_per_slice * blocks_per_mb;
 475
 476     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 477     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 478                ctx->scantable, qmat);
 479 }
 480
 481 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 482 {
 483     const int dbits = (abits == 8) ? 4 : 7;
 484     const int dsize = 1 << dbits - 1;
 485     int diff = cur - prev;
 486
 487     diff = av_mod_uintp2(diff, abits);
 488     if (diff >= (1 << abits) - dsize)
 489         diff -= 1 << abits;
 490     if (diff < -dsize || diff > dsize || !diff) {
 491         put_bits(pb, 1, 1);
 492         put_bits(pb, abits, diff);
 493     } else {
 494         put_bits(pb, 1, 0);
 495         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 496         put_bits(pb, 1, diff < 0);
 497     }
 498 }
 499
 500 static void put_alpha_run(PutBitContext *pb, int run)
 501 {
 502     if (run) {
 503         put_bits(pb, 1, 0);
 504         if (run < 0x10)
 505             put_bits(pb, 4, run);
 506         else
 507             put_bits(pb, 15, run);
 508     } else {
 509         put_bits(pb, 1, 1);
 510     }
 511 }
 512
 513 // todo alpha quantisation for high quants
 514 static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 515                               int mbs_per_slice, uint16_t *blocks,
 516                               int quant)
 517 {
 518     const int abits = ctx->alpha_bits;
 519     const int mask  = (1 << abits) - 1;
 520     const int num_coeffs = mbs_per_slice * 256;
 521     int prev = mask, cur;
 522     int idx = 0;
 523     int run = 0;
 524
 525     cur = blocks[idx++];
 526     put_alpha_diff(pb, cur, prev, abits);
 527     prev = cur;
 528     do {
 529         cur = blocks[idx++];
 530         if (cur != prev) {
 531             put_alpha_run (pb, run);
 532             put_alpha_diff(pb, cur, prev, abits);
 533             prev = cur;
 534             run  = 0;
 535         } else {
 536             run++;
 537         }
 538     } while (idx < num_coeffs);
 539     if (run)
 540         put_alpha_run(pb, run);
 541 }
 542
 543 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 544                         PutBitContext *pb,
 545                         int sizes[4], int x, int y, int quant,
 546                         int mbs_per_slice)
 547 {
 548     ProresContext *ctx = avctx->priv_data;
 549     int i, xp, yp;
 550     int total_size = 0;
 551     const uint16_t *src;
 552     int slice_width_factor = av_log2(mbs_per_slice);
 553     int num_cblocks, pwidth, line_add;
 554     ptrdiff_t linesize;
 555     int plane_factor, is_chroma;
 556     uint16_t *qmat;
 557     uint16_t *qmat_chroma;
 558
 559     if (ctx->pictures_per_frame == 1)
 560         line_add = 0;
 561     else
 562         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 563
 564     if (ctx->force_quant) {
 565         qmat = ctx->quants[0];
 566         qmat_chroma = ctx->quants_chroma[0];
 567     } else if (quant < MAX_STORED_Q) {
 568         qmat = ctx->quants[quant];
 569         qmat_chroma = ctx->quants_chroma[quant];
 570     } else {
 571         qmat = ctx->custom_q;
 572         qmat_chroma = ctx->custom_chroma_q;
 573         for (i = 0; i < 64; i++) {
 574             qmat[i] = ctx->quant_mat[i] * quant;
 575             qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
 576         }
 577     }
 578
 579     for (i = 0; i < ctx->num_planes; i++) {
 580         is_chroma    = (i == 1 || i == 2);
 581         plane_factor = slice_width_factor + 2;
 582         if (is_chroma)
 583             plane_factor += ctx->chroma_factor - 3;
 584         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 585             xp          = x << 4;
 586             yp          = y << 4;
 587             num_cblocks = 4;
 588             pwidth      = avctx->width;
 589         } else {
 590             xp          = x << 3;
 591             yp          = y << 4;
 592             num_cblocks = 2;
 593             pwidth      = avctx->width >> 1;
 594         }
 595
 596         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 597         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 598                                 line_add * pic->linesize[i]) + xp;
 599
 600         if (i < 3) {
 601             get_slice_data(ctx, src, linesize, xp, yp,
 602                            pwidth, avctx->height / ctx->pictures_per_frame,
 603                            ctx->blocks[0], ctx->emu_buf,
 604                            mbs_per_slice, num_cblocks, is_chroma);
 605             if (!is_chroma) {/* luma quant */
 606                 encode_slice_plane(ctx, pb, src, linesize,
 607                                    mbs_per_slice, ctx->blocks[0],
 608                                    num_cblocks, plane_factor, qmat);
 609             } else { /* chroma plane */
 610                 encode_slice_plane(ctx, pb, src, linesize,
 611                                    mbs_per_slice, ctx->blocks[0],
 612                                    num_cblocks, plane_factor, qmat_chroma);
 613             }
 614         } else {
 615             get_alpha_data(ctx, src, linesize, xp, yp,
 616                            pwidth, avctx->height / ctx->pictures_per_frame,
 617                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 618             encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
 619         }
 620         flush_put_bits(pb);
 621         sizes[i]   = put_bytes_output(pb) - total_size;
 622         total_size = put_bytes_output(pb);
 623     }
 624     return total_size;
 625 }
 626
 627 static inline int estimate_vlc(unsigned codebook, int val)
 628 {
 629     unsigned int rice_order, exp_order, switch_bits, switch_val;
 630     int exponent;
 631
 632     /* number of prefix bits to switch between Rice and expGolomb */
 633     switch_bits = (codebook & 3) + 1;
 634     rice_order  =  codebook >> 5;       /* rice code order */
 635     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 636
 637     switch_val  = switch_bits << rice_order;
 638
 639     if (val >= switch_val) {
 640         val -= switch_val - (1 << exp_order);
 641         exponent = av_log2(val);
 642
 643         return exponent * 2 - exp_order + switch_bits + 1;
 644     } else {
 645         return (val >> rice_order) + rice_order + 1;
 646     }
 647 }
 648
 649 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 650                         int scale)
 651 {
 652     int i;
 653     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 654     int bits;
 655
 656     prev_dc  = (blocks[0] - 0x4000) / scale;
 657     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 658     sign     = 0;
 659     codebook = 3;
 660     blocks  += 64;
 661     *error  += FFABS(blocks[0] - 0x4000) % scale;
 662
 663     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 664         dc       = (blocks[0] - 0x4000) / scale;
 665         *error  += FFABS(blocks[0] - 0x4000) % scale;
 666         delta    = dc - prev_dc;
 667         new_sign = GET_SIGN(delta);
 668         delta    = (delta ^ sign) - sign;
 669         code     = MAKE_CODE(delta);
 670         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 671         codebook = (code + (code & 1)) >> 1;
 672         codebook = FFMIN(codebook, 3);
 673         sign     = new_sign;
 674         prev_dc  = dc;
 675     }
 676
 677     return bits;
 678 }
 679
 680 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 681                         int plane_size_factor,
 682                         const uint8_t *scan, const int16_t *qmat)
 683 {
 684     int idx, i;
 685     int run, level, run_cb, lev_cb;
 686     int max_coeffs, abs_level;
 687     int bits = 0;
 688
 689     max_coeffs = blocks_per_slice << 6;
 690     run_cb     = ff_prores_run_to_cb_index[4];
 691     lev_cb     = ff_prores_lev_to_cb_index[2];
 692     run        = 0;
 693
 694     for (i = 1; i < 64; i++) {
 695         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 696             level   = blocks[idx] / qmat[scan[i]];
 697             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 698             if (level) {
 699                 abs_level = FFABS(level);
 700                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 701                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 702                                      abs_level - 1) + 1;
 703
 704                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 705                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 706                 run    = 0;
 707             } else {
 708                 run++;
 709             }
 710         }
 711     }
 712
 713     return bits;
 714 }
 715
 716 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 717                                 const uint16_t *src, ptrdiff_t linesize,
 718                                 int mbs_per_slice,
 719                                 int blocks_per_mb, int plane_size_factor,
 720                                 const int16_t *qmat, ProresThreadData *td)
 721 {
 722     int blocks_per_slice;
 723     int bits;
 724
 725     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 726
 727     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 728     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 729                          plane_size_factor, ctx->scantable, qmat);
 730
 731     return FFALIGN(bits, 8);
 732 }
 733
 734 static int est_alpha_diff(int cur, int prev, int abits)
 735 {
 736     const int dbits = (abits == 8) ? 4 : 7;
 737     const int dsize = 1 << dbits - 1;
 738     int diff = cur - prev;
 739
 740     diff = av_mod_uintp2(diff, abits);
 741     if (diff >= (1 << abits) - dsize)
 742         diff -= 1 << abits;
 743     if (diff < -dsize || diff > dsize || !diff)
 744         return abits + 1;
 745     else
 746         return dbits + 1;
 747 }
 748
 749 static int estimate_alpha_plane(ProresContext *ctx,
 750                                 const uint16_t *src, ptrdiff_t linesize,
 751                                 int mbs_per_slice, int16_t *blocks)
 752 {
 753     const int abits = ctx->alpha_bits;
 754     const int mask  = (1 << abits) - 1;
 755     const int num_coeffs = mbs_per_slice * 256;
 756     int prev = mask, cur;
 757     int idx = 0;
 758     int run = 0;
 759     int bits;
 760
 761     cur = blocks[idx++];
 762     bits = est_alpha_diff(cur, prev, abits);
 763     prev = cur;
 764     do {
 765         cur = blocks[idx++];
 766         if (cur != prev) {
 767             if (!run)
 768                 bits++;
 769             else if (run < 0x10)
 770                 bits += 4;
 771             else
 772                 bits += 15;
 773             bits += est_alpha_diff(cur, prev, abits);
 774             prev = cur;
 775             run  = 0;
 776         } else {
 777             run++;
 778         }
 779     } while (idx < num_coeffs);
 780
 781     if (run) {
 782         if (run < 0x10)
 783             bits += 4;
 784         else
 785             bits += 15;
 786     }
 787
 788     return bits;
 789 }
 790
 791 static int find_slice_quant(AVCodecContext *avctx,
 792                             int trellis_node, int x, int y, int mbs_per_slice,
 793                             ProresThreadData *td)
 794 {
 795     ProresContext *ctx = avctx->priv_data;
 796     int i, q, pq, xp, yp;
 797     const uint16_t *src;
 798     int slice_width_factor = av_log2(mbs_per_slice);
 799     int num_cblocks[MAX_PLANES], pwidth;
 800     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 801     const int min_quant = ctx->profile_info->min_quant;
 802     const int max_quant = ctx->profile_info->max_quant;
 803     int error, bits, bits_limit;
 804     int mbs, prev, cur, new_score;
 805     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 806     int overquant;
 807     uint16_t *qmat;
 808     uint16_t *qmat_chroma;
 809     int linesize[4], line_add;
 810     int alpha_bits = 0;
 811
 812     if (ctx->pictures_per_frame == 1)
 813         line_add = 0;
 814     else
 815         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
 816     mbs = x + mbs_per_slice;
 817
 818     for (i = 0; i < ctx->num_planes; i++) {
 819         is_chroma[i]    = (i == 1 || i == 2);
 820         plane_factor[i] = slice_width_factor + 2;
 821         if (is_chroma[i])
 822             plane_factor[i] += ctx->chroma_factor - 3;
 823         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 824             xp             = x << 4;
 825             yp             = y << 4;
 826             num_cblocks[i] = 4;
 827             pwidth         = avctx->width;
 828         } else {
 829             xp             = x << 3;
 830             yp             = y << 4;
 831             num_cblocks[i] = 2;
 832             pwidth         = avctx->width >> 1;
 833         }
 834
 835         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
 836         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
 837                                  line_add * ctx->pic->linesize[i]) + xp;
 838
 839         if (i < 3) {
 840             get_slice_data(ctx, src, linesize[i], xp, yp,
 841                            pwidth, avctx->height / ctx->pictures_per_frame,
 842                            td->blocks[i], td->emu_buf,
 843                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 844         } else {
 845             get_alpha_data(ctx, src, linesize[i], xp, yp,
 846                            pwidth, avctx->height / ctx->pictures_per_frame,
 847                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 848         }
 849     }
 850
 851     for (q = min_quant; q < max_quant + 2; q++) {
 852         td->nodes[trellis_node + q].prev_node = -1;
 853         td->nodes[trellis_node + q].quant     = q;
 854     }
 855
 856     if (ctx->alpha_bits)
 857         alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
 858                                           mbs_per_slice, td->blocks[3]);
 859     // todo: maybe perform coarser quantising to fit into frame size when needed
 860     for (q = min_quant; q <= max_quant; q++) {
 861         bits  = alpha_bits;
 862         error = 0;
 863         bits += estimate_slice_plane(ctx, &error, 0,
 864                                      src, linesize[0],
 865                                      mbs_per_slice,
 866                                      num_cblocks[0], plane_factor[0],
 867                                      ctx->quants[q], td); /* estimate luma plane */
 868         for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
 869             bits += estimate_slice_plane(ctx, &error, i,
 870                                          src, linesize[i],
 871                                          mbs_per_slice,
 872                                          num_cblocks[i], plane_factor[i],
 873                                          ctx->quants_chroma[q], td);
 874         }
 875         if (bits > 65000 * 8)
 876             error = SCORE_LIMIT;
 877
 878         slice_bits[q]  = bits;
 879         slice_score[q] = error;
 880     }
 881     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 882         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 883         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 884         overquant = max_quant;
 885     } else {
 886         for (q = max_quant + 1; q < 128; q++) {
 887             bits  = alpha_bits;
 888             error = 0;
 889             if (q < MAX_STORED_Q) {
 890                 qmat = ctx->quants[q];
 891                 qmat_chroma = ctx->quants_chroma[q];
 892             } else {
 893                 qmat = td->custom_q;
 894                 qmat_chroma = td->custom_chroma_q;
 895                 for (i = 0; i < 64; i++) {
 896                     qmat[i] = ctx->quant_mat[i] * q;
 897                     qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
 898                 }
 899             }
 900             bits += estimate_slice_plane(ctx, &error, 0,
 901                                          src, linesize[0],
 902                                          mbs_per_slice,
 903                                          num_cblocks[0], plane_factor[0],
 904                                          qmat, td);/* estimate luma plane */
 905             for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
 906                 bits += estimate_slice_plane(ctx, &error, i,
 907                                              src, linesize[i],
 908                                              mbs_per_slice,
 909                                              num_cblocks[i], plane_factor[i],
 910                                              qmat_chroma, td);
 911             }
 912             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 913                 break;
 914         }
 915
 916         slice_bits[max_quant + 1]  = bits;
 917         slice_score[max_quant + 1] = error;
 918         overquant = q;
 919     }
 920     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 921
 922     bits_limit = mbs * ctx->bits_per_mb;
 923     for (pq = min_quant; pq < max_quant + 2; pq++) {
 924         prev = trellis_node - TRELLIS_WIDTH + pq;
 925
 926         for (q = min_quant; q < max_quant + 2; q++) {
 927             cur = trellis_node + q;
 928
 929             bits  = td->nodes[prev].bits + slice_bits[q];
 930             error = slice_score[q];
 931             if (bits > bits_limit)
 932                 error = SCORE_LIMIT;
 933
 934             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 935                 new_score = td->nodes[prev].score + error;
 936             else
 937                 new_score = SCORE_LIMIT;
 938             if (td->nodes[cur].prev_node == -1 ||
 939                 td->nodes[cur].score >= new_score) {
 940
 941                 td->nodes[cur].bits      = bits;
 942                 td->nodes[cur].score     = new_score;
 943                 td->nodes[cur].prev_node = prev;
 944             }
 945         }
 946     }
 947
 948     error = td->nodes[trellis_node + min_quant].score;
 949     pq    = trellis_node + min_quant;
 950     for (q = min_quant + 1; q < max_quant + 2; q++) {
 951         if (td->nodes[trellis_node + q].score <= error) {
 952             error = td->nodes[trellis_node + q].score;
 953             pq    = trellis_node + q;
 954         }
 955     }
 956
 957     return pq;
 958 }
 959
 960 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 961                              int jobnr, int threadnr)
 962 {
 963     ProresContext *ctx = avctx->priv_data;
 964     ProresThreadData *td = ctx->tdata + threadnr;
 965     int mbs_per_slice = ctx->mbs_per_slice;
 966     int x, y = jobnr, mb, q = 0;
 967
 968     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 969         while (ctx->mb_width - x < mbs_per_slice)
 970             mbs_per_slice >>= 1;
 971         q = find_slice_quant(avctx,
 972                              (mb + 1) * TRELLIS_WIDTH, x, y,
 973                              mbs_per_slice, td);
 974     }
 975
 976     for (x = ctx->slices_width - 1; x >= 0; x--) {
 977         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 978         q = td->nodes[q].prev_node;
 979     }
 980
 981     return 0;
 982 }
 983
 984 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 985                         const AVFrame *pic, int *got_packet)
 986 {
 987     ProresContext *ctx = avctx->priv_data;
 988     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 989     uint8_t *picture_size_pos;
 990     PutBitContext pb;
 991     int x, y, i, mb, q = 0;
 992     int sizes[4] = { 0 };
 993     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 994     int frame_size, picture_size, slice_size;
 995     int pkt_size, ret;
 996     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
 997     uint8_t frame_flags;
 998
 999     ctx->pic = pic;
1000     pkt_size = ctx->frame_size_upper_bound;
1001
1002     if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1003         return ret;
1004
1005     orig_buf = pkt->data;
1006
1007     // frame atom
1008     orig_buf += 4;                              // frame size
1009     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1010     buf = orig_buf;
1011
1012     // frame header
1013     tmp = buf;
1014     buf += 2;                                   // frame header size will be stored here
1015     bytestream_put_be16  (&buf, 0);             // version 1
1016     bytestream_put_buffer(&buf, ctx->vendor, 4);
1017     bytestream_put_be16  (&buf, avctx->width);
1018     bytestream_put_be16  (&buf, avctx->height);
1019
1020     frame_flags = ctx->chroma_factor << 6;
1021     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1022         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1023     bytestream_put_byte  (&buf, frame_flags);
1024
1025     bytestream_put_byte  (&buf, 0);             // reserved
1026     bytestream_put_byte  (&buf, pic->color_primaries);
1027     bytestream_put_byte  (&buf, pic->color_trc);
1028     bytestream_put_byte  (&buf, pic->colorspace);
1029     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1030     bytestream_put_byte  (&buf, 0);             // reserved
1031     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1032         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1033         // luma quantisation matrix
1034         for (i = 0; i < 64; i++)
1035             bytestream_put_byte(&buf, ctx->quant_mat[i]);
1036         // chroma quantisation matrix
1037         for (i = 0; i < 64; i++)
1038             bytestream_put_byte(&buf, ctx->quant_mat[i]);
1039     } else {
1040         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1041     }
1042     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1043
1044     for (ctx->cur_picture_idx = 0;
1045          ctx->cur_picture_idx < ctx->pictures_per_frame;
1046          ctx->cur_picture_idx++) {
1047         // picture header
1048         picture_size_pos = buf + 1;
1049         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1050         buf += 4;                                   // picture data size will be stored here
1051         bytestream_put_be16  (&buf, ctx->slices_per_picture);
1052         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1053
1054         // seek table - will be filled during slice encoding
1055         slice_sizes = buf;
1056         buf += ctx->slices_per_picture * 2;
1057
1058         // slices
1059         if (!ctx->force_quant) {
1060             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1061                                   ctx->mb_height);
1062             if (ret)
1063                 return ret;
1064         }
1065
1066         for (y = 0; y < ctx->mb_height; y++) {
1067             int mbs_per_slice = ctx->mbs_per_slice;
1068             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1069                 q = ctx->force_quant ? ctx->force_quant
1070                                      : ctx->slice_q[mb + y * ctx->slices_width];
1071
1072                 while (ctx->mb_width - x < mbs_per_slice)
1073                     mbs_per_slice >>= 1;
1074
1075                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1076                 slice_hdr = buf;
1077                 buf += slice_hdr_size - 1;
1078                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1079                     uint8_t *start = pkt->data;
1080                     // Recompute new size according to max_slice_size
1081                     // and deduce delta
1082                     int delta = 200 + (ctx->pictures_per_frame *
1083                                 ctx->slices_per_picture + 1) *
1084                                 max_slice_size - pkt_size;
1085
1086                     delta = FFMAX(delta, 2 * max_slice_size);
1087                     ctx->frame_size_upper_bound += delta;
1088
1089                     if (!ctx->warn) {
1090                         avpriv_request_sample(avctx,
1091                                               "Packet too small: is %i,"
1092                                               " needs %i (slice: %i). "
1093                                               "Correct allocation",
1094                                               pkt_size, delta, max_slice_size);
1095                         ctx->warn = 1;
1096                     }
1097
1098                     ret = av_grow_packet(pkt, delta);
1099                     if (ret < 0)
1100                         return ret;
1101
1102                     pkt_size += delta;
1103                     // restore pointers
1104                     orig_buf         = pkt->data + (orig_buf         - start);
1105                     buf              = pkt->data + (buf              - start);
1106                     picture_size_pos = pkt->data + (picture_size_pos - start);
1107                     slice_sizes      = pkt->data + (slice_sizes      - start);
1108                     slice_hdr        = pkt->data + (slice_hdr        - start);
1109                     tmp              = pkt->data + (tmp              - start);
1110                 }
1111                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1112                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1113                                    mbs_per_slice);
1114                 if (ret < 0)
1115                     return ret;
1116
1117                 bytestream_put_byte(&slice_hdr, q);
1118                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1119                 for (i = 0; i < ctx->num_planes - 1; i++) {
1120                     bytestream_put_be16(&slice_hdr, sizes[i]);
1121                     slice_size += sizes[i];
1122                 }
1123                 bytestream_put_be16(&slice_sizes, slice_size);
1124                 buf += slice_size - slice_hdr_size;
1125                 if (max_slice_size < slice_size)
1126                     max_slice_size = slice_size;
1127             }
1128         }
1129
1130         picture_size = buf - (picture_size_pos - 1);
1131         bytestream_put_be32(&picture_size_pos, picture_size);
1132     }
1133
1134     orig_buf -= 8;
1135     frame_size = buf - orig_buf;
1136     bytestream_put_be32(&orig_buf, frame_size);
1137
1138     pkt->size   = frame_size;
1139     *got_packet = 1;
1140
1141     return 0;
1142 }
1143
1144 static av_cold int encode_close(AVCodecContext *avctx)
1145 {
1146     ProresContext *ctx = avctx->priv_data;
1147     int i;
1148
1149     if (ctx->tdata) {
1150         for (i = 0; i < avctx->thread_count; i++)
1151             av_freep(&ctx->tdata[i].nodes);
1152     }
1153     av_freep(&ctx->tdata);
1154     av_freep(&ctx->slice_q);
1155
1156     return 0;
1157 }
1158
1159 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160                         ptrdiff_t linesize, int16_t *block)
1161 {
1162     int x, y;
1163     const uint16_t *tsrc = src;
1164
1165     for (y = 0; y < 8; y++) {
1166         for (x = 0; x < 8; x++)
1167             block[y * 8 + x] = tsrc[x];
1168         tsrc += linesize >> 1;
1169     }
1170     fdsp->fdct(block);
1171 }
1172
1173 static av_cold int encode_init(AVCodecContext *avctx)
1174 {
1175     ProresContext *ctx = avctx->priv_data;
1176     int mps;
1177     int i, j;
1178     int min_quant, max_quant;
1179     int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1180
1181     avctx->bits_per_raw_sample = 10;
1182
1183     ctx->fdct      = prores_fdct;
1184     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1185                                 : ff_prores_progressive_scan;
1186     ff_fdctdsp_init(&ctx->fdsp, avctx);
1187
1188     mps = ctx->mbs_per_slice;
1189     if (mps & (mps - 1)) {
1190         av_log(avctx, AV_LOG_ERROR,
1191                "there should be an integer power of two MBs per slice\n");
1192         return AVERROR(EINVAL);
1193     }
1194     if (ctx->profile == PRORES_PROFILE_AUTO) {
1195         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1196         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1197                         !(desc->log2_chroma_w + desc->log2_chroma_h))
1198                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1199         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1200                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1201                ? "4:4:4:4 profile because of the used input colorspace"
1202                : "HQ profile to keep best quality");
1203     }
1204     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1205         if (ctx->profile != PRORES_PROFILE_4444 &&
1206             ctx->profile != PRORES_PROFILE_4444XQ) {
1207             // force alpha and warn
1208             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1209                    "encode alpha. Override with -profile if needed.\n");
1210             ctx->alpha_bits = 0;
1211         }
1212         if (ctx->alpha_bits & 7) {
1213             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1214             return AVERROR(EINVAL);
1215         }
1216         avctx->bits_per_coded_sample = 32;
1217     } else {
1218         ctx->alpha_bits = 0;
1219     }
1220
1221     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1222                          ? CFACTOR_Y422
1223                          : CFACTOR_Y444;
1224     ctx->profile_info  = prores_profile_info + ctx->profile;
1225     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1226
1227     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1228
1229     if (interlaced)
1230         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1231     else
1232         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1233
1234     ctx->slices_width  = ctx->mb_width / mps;
1235     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1236     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1237     ctx->pictures_per_frame = 1 + interlaced;
1238
1239     if (ctx->quant_sel == -1) {
1240         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1241         ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1242     } else {
1243         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1244         ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1245     }
1246
1247     if (strlen(ctx->vendor) != 4) {
1248         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1249         return AVERROR_INVALIDDATA;
1250     }
1251
1252     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1253     if (!ctx->force_quant) {
1254         if (!ctx->bits_per_mb) {
1255             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1256                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1257                                            ctx->pictures_per_frame)
1258                     break;
1259             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1260             if (ctx->alpha_bits)
1261                 ctx->bits_per_mb *= 20;
1262         } else if (ctx->bits_per_mb < 128) {
1263             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1264             return AVERROR_INVALIDDATA;
1265         }
1266
1267         min_quant = ctx->profile_info->min_quant;
1268         max_quant = ctx->profile_info->max_quant;
1269         for (i = min_quant; i < MAX_STORED_Q; i++) {
1270             for (j = 0; j < 64; j++) {
1271                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1272                 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1273             }
1274         }
1275
1276         ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1277         if (!ctx->slice_q)
1278             return AVERROR(ENOMEM);
1279
1280         ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1281         if (!ctx->tdata)
1282             return AVERROR(ENOMEM);
1283
1284         for (j = 0; j < avctx->thread_count; j++) {
1285             ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1286                                                   TRELLIS_WIDTH
1287                                                   * sizeof(*ctx->tdata->nodes));
1288             if (!ctx->tdata[j].nodes)
1289                 return AVERROR(ENOMEM);
1290             for (i = min_quant; i < max_quant + 2; i++) {
1291                 ctx->tdata[j].nodes[i].prev_node = -1;
1292                 ctx->tdata[j].nodes[i].bits      = 0;
1293                 ctx->tdata[j].nodes[i].score     = 0;
1294             }
1295         }
1296     } else {
1297         int ls = 0;
1298         int ls_chroma = 0;
1299
1300         if (ctx->force_quant > 64) {
1301             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1302             return AVERROR_INVALIDDATA;
1303         }
1304
1305         for (j = 0; j < 64; j++) {
1306             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1307             ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1308             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1309             ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1310         }
1311
1312         ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1313         if (ctx->chroma_factor == CFACTOR_Y444)
1314             ctx->bits_per_mb += ls_chroma * 4;
1315     }
1316
1317     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1318                                    ctx->slices_per_picture + 1) *
1319                                   (2 + 2 * ctx->num_planes +
1320                                    (mps * ctx->bits_per_mb) / 8)
1321                                   + 200;
1322
1323     if (ctx->alpha_bits) {
1324          // The alpha plane is run-coded and might exceed the bit budget.
1325          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1326                                          ctx->slices_per_picture + 1) *
1327          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1328          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1329     }
1330
1331     avctx->codec_tag   = ctx->profile_info->tag;
1332
1333     av_log(avctx, AV_LOG_DEBUG,
1334            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1335            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1336            interlaced ? "yes" : "no", ctx->bits_per_mb);
1337     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1338            ctx->frame_size_upper_bound);
1339
1340     return 0;
1341 }
1342
1343 #define OFFSET(x) offsetof(ProresContext, x)
1344 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1345
1346 static const AVOption options[] = {
1347     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1348         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1349     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1350         { .i64 = PRORES_PROFILE_AUTO },
1351         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1352     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1353         0, 0, VE, "profile" },
1354     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1355         0, 0, VE, "profile" },
1356     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1357         0, 0, VE, "profile" },
1358     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1359         0, 0, VE, "profile" },
1360     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1361         0, 0, VE, "profile" },
1362     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1363         0, 0, VE, "profile" },
1364     { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1365         0, 0, VE, "profile" },
1366     { "vendor", "vendor ID", OFFSET(vendor),
1367         AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1368     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1369         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1370     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1371         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1372     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1373         0, 0, VE, "quant_mat" },
1374     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1375         0, 0, VE, "quant_mat" },
1376     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1377         0, 0, VE, "quant_mat" },
1378     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1379         0, 0, VE, "quant_mat" },
1380     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1381         0, 0, VE, "quant_mat" },
1382     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1383         0, 0, VE, "quant_mat" },
1384     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1385         { .i64 = 16 }, 0, 16, VE },
1386     { NULL }
1387 };
1388
1389 static const AVClass proresenc_class = {
1390     .class_name = "ProRes encoder",
1391     .item_name  = av_default_item_name,
1392     .option     = options,
1393     .version    = LIBAVUTIL_VERSION_INT,
1394 };
1395
1396 const FFCodec ff_prores_ks_encoder = {
1397     .p.name         = "prores_ks",
1398     .p.long_name    = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1399     .p.type         = AVMEDIA_TYPE_VIDEO,
1400     .p.id           = AV_CODEC_ID_PRORES,
1401     .priv_data_size = sizeof(ProresContext),
1402     .init           = encode_init,
1403     .close          = encode_close,
1404     FF_CODEC_ENCODE_CB(encode_frame),
1405     .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1406     .p.pix_fmts     = (const enum AVPixelFormat[]) {
1407                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1408                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1409                       },
1410     .p.priv_class   = &proresenc_class,
1411     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1412     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1413 };