libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/mem_internal.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/pixdesc.h"
  29 #include "avcodec.h"
  30 #include "codec_internal.h"
  31 #include "encode.h"
  32 #include "fdctdsp.h"
  33 #include "put_bits.h"
  34 #include "profiles.h"
  35 #include "bytestream.h"
  36 #include "proresdata.h"
  37
  38 #define CFACTOR_Y422 2
  39 #define CFACTOR_Y444 3
  40
  41 #define MAX_MBS_PER_SLICE 8
  42
  43 #define MAX_PLANES 4
  44
  45 enum {
  46     PRORES_PROFILE_AUTO  = -1,
  47     PRORES_PROFILE_PROXY = 0,
  48     PRORES_PROFILE_LT,
  49     PRORES_PROFILE_STANDARD,
  50     PRORES_PROFILE_HQ,
  51     PRORES_PROFILE_4444,
  52     PRORES_PROFILE_4444XQ,
  53 };
  54
  55 enum {
  56     QUANT_MAT_PROXY = 0,
  57     QUANT_MAT_PROXY_CHROMA,
  58     QUANT_MAT_LT,
  59     QUANT_MAT_STANDARD,
  60     QUANT_MAT_HQ,
  61     QUANT_MAT_XQ_LUMA,
  62     QUANT_MAT_DEFAULT,
  63 };
  64
  65 static const uint8_t prores_quant_matrices[][64] = {
  66     { // proxy
  67          4,  7,  9, 11, 13, 14, 15, 63,
  68          7,  7, 11, 12, 14, 15, 63, 63,
  69          9, 11, 13, 14, 15, 63, 63, 63,
  70         11, 11, 13, 14, 63, 63, 63, 63,
  71         11, 13, 14, 63, 63, 63, 63, 63,
  72         13, 14, 63, 63, 63, 63, 63, 63,
  73         13, 63, 63, 63, 63, 63, 63, 63,
  74         63, 63, 63, 63, 63, 63, 63, 63,
  75     },
  76     { // proxy chromas
  77         4,  7,  9, 11, 13, 14, 63, 63,
  78         7,  7, 11, 12, 14, 63, 63, 63,
  79         9, 11, 13, 14, 63, 63, 63, 63,
  80         11, 11, 13, 14, 63, 63, 63, 63,
  81         11, 13, 14, 63, 63, 63, 63, 63,
  82         13, 14, 63, 63, 63, 63, 63, 63,
  83         13, 63, 63, 63, 63, 63, 63, 63,
  84         63, 63, 63, 63, 63, 63, 63, 63
  85     },
  86     { // LT
  87          4,  5,  6,  7,  9, 11, 13, 15,
  88          5,  5,  7,  8, 11, 13, 15, 17,
  89          6,  7,  9, 11, 13, 15, 15, 17,
  90          7,  7,  9, 11, 13, 15, 17, 19,
  91          7,  9, 11, 13, 14, 16, 19, 23,
  92          9, 11, 13, 14, 16, 19, 23, 29,
  93          9, 11, 13, 15, 17, 21, 28, 35,
  94         11, 13, 16, 17, 21, 28, 35, 41,
  95     },
  96     { // standard
  97          4,  4,  5,  5,  6,  7,  7,  9,
  98          4,  4,  5,  6,  7,  7,  9,  9,
  99          5,  5,  6,  7,  7,  9,  9, 10,
 100          5,  5,  6,  7,  7,  9,  9, 10,
 101          5,  6,  7,  7,  8,  9, 10, 12,
 102          6,  7,  7,  8,  9, 10, 12, 15,
 103          6,  7,  7,  9, 10, 11, 14, 17,
 104          7,  7,  9, 10, 11, 14, 17, 21,
 105     },
 106     { // high quality
 107          4,  4,  4,  4,  4,  4,  4,  4,
 108          4,  4,  4,  4,  4,  4,  4,  4,
 109          4,  4,  4,  4,  4,  4,  4,  4,
 110          4,  4,  4,  4,  4,  4,  4,  5,
 111          4,  4,  4,  4,  4,  4,  5,  5,
 112          4,  4,  4,  4,  4,  5,  5,  6,
 113          4,  4,  4,  4,  5,  5,  6,  7,
 114          4,  4,  4,  4,  5,  6,  7,  7,
 115     },
 116     { // XQ luma
 117         2,  2,  2,  2,  2,  2,  2,  2,
 118         2,  2,  2,  2,  2,  2,  2,  2,
 119         2,  2,  2,  2,  2,  2,  2,  2,
 120         2,  2,  2,  2,  2,  2,  2,  3,
 121         2,  2,  2,  2,  2,  2,  3,  3,
 122         2,  2,  2,  2,  2,  3,  3,  3,
 123         2,  2,  2,  2,  3,  3,  3,  4,
 124         2,  2,  2,  2,  3,  3,  4,  4,
 125     },
 126     { // codec default
 127          4,  4,  4,  4,  4,  4,  4,  4,
 128          4,  4,  4,  4,  4,  4,  4,  4,
 129          4,  4,  4,  4,  4,  4,  4,  4,
 130          4,  4,  4,  4,  4,  4,  4,  4,
 131          4,  4,  4,  4,  4,  4,  4,  4,
 132          4,  4,  4,  4,  4,  4,  4,  4,
 133          4,  4,  4,  4,  4,  4,  4,  4,
 134          4,  4,  4,  4,  4,  4,  4,  4,
 135     },
 136 };
 137
 138 static const uint8_t prores_dc_codebook[4] = {
 139     0x04, // rice_order = 0, exp_golomb_order = 1, switch_bits = 0
 140     0x28, // rice_order = 1, exp_golomb_order = 2, switch_bits = 0
 141     0x4D, // rice_order = 2, exp_golomb_order = 3, switch_bits = 1
 142     0x70  // rice_order = 3, exp_golomb_order = 4, switch_bits = 0
 143 };
 144
 145 static const uint8_t prores_ac_codebook[7] = {
 146     0x04, // rice_order = 0, exp_golomb_order = 1, switch_bits = 0
 147     0x28, // rice_order = 1, exp_golomb_order = 2, switch_bits = 0
 148     0x4C, // rice_order = 2, exp_golomb_order = 3, switch_bits = 0
 149     0x05, // rice_order = 0, exp_golomb_order = 1, switch_bits = 1
 150     0x29, // rice_order = 1, exp_golomb_order = 2, switch_bits = 1
 151     0x06, // rice_order = 0, exp_golomb_order = 1, switch_bits = 2
 152     0x0A, // rice_order = 0, exp_golomb_order = 2, switch_bits = 2
 153 };
 154
 155 /**
 156  * Lookup tables for adaptive switching between codebooks
 157  * according with previous run/level value.
 158  */
 159 static const uint8_t prores_run_to_cb_index[16] =
 160     { 5, 5, 3, 3, 0, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 2 };
 161
 162 static const uint8_t prores_lev_to_cb_index[10] = { 0, 6, 3, 5, 0, 1, 1, 1, 1, 2 };
 163
 164 #define NUM_MB_LIMITS 4
 165 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 166     1620, // up to 720x576
 167     2700, // up to 960x720
 168     6075, // up to 1440x1080
 169     9216, // up to 2048x1152
 170 };
 171
 172 static const struct prores_profile {
 173     const char *full_name;
 174     uint32_t    tag;
 175     int         min_quant;
 176     int         max_quant;
 177     int         br_tab[NUM_MB_LIMITS];
 178     int         quant;
 179     int         quant_chroma;
 180 } prores_profile_info[6] = {
 181     {
 182         .full_name = "proxy",
 183         .tag       = MKTAG('a', 'p', 'c', 'o'),
 184         .min_quant = 4,
 185         .max_quant = 8,
 186         .br_tab    = { 300, 242, 220, 194 },
 187         .quant     = QUANT_MAT_PROXY,
 188         .quant_chroma = QUANT_MAT_PROXY_CHROMA,
 189     },
 190     {
 191         .full_name = "LT",
 192         .tag       = MKTAG('a', 'p', 'c', 's'),
 193         .min_quant = 1,
 194         .max_quant = 9,
 195         .br_tab    = { 720, 560, 490, 440 },
 196         .quant     = QUANT_MAT_LT,
 197         .quant_chroma = QUANT_MAT_LT,
 198     },
 199     {
 200         .full_name = "standard",
 201         .tag       = MKTAG('a', 'p', 'c', 'n'),
 202         .min_quant = 1,
 203         .max_quant = 6,
 204         .br_tab    = { 1050, 808, 710, 632 },
 205         .quant     = QUANT_MAT_STANDARD,
 206         .quant_chroma = QUANT_MAT_STANDARD,
 207     },
 208     {
 209         .full_name = "high quality",
 210         .tag       = MKTAG('a', 'p', 'c', 'h'),
 211         .min_quant = 1,
 212         .max_quant = 6,
 213         .br_tab    = { 1566, 1216, 1070, 950 },
 214         .quant     = QUANT_MAT_HQ,
 215         .quant_chroma = QUANT_MAT_HQ,
 216     },
 217     {
 218         .full_name = "4444",
 219         .tag       = MKTAG('a', 'p', '4', 'h'),
 220         .min_quant = 1,
 221         .max_quant = 6,
 222         .br_tab    = { 2350, 1828, 1600, 1425 },
 223         .quant     = QUANT_MAT_HQ,
 224         .quant_chroma = QUANT_MAT_HQ,
 225     },
 226     {
 227         .full_name = "4444XQ",
 228         .tag       = MKTAG('a', 'p', '4', 'x'),
 229         .min_quant = 1,
 230         .max_quant = 6,
 231         .br_tab    = { 3525, 2742, 2400, 2137 },
 232         .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
 233         .quant_chroma = QUANT_MAT_HQ,
 234     }
 235 };
 236
 237 #define TRELLIS_WIDTH 16
 238 #define SCORE_LIMIT   INT_MAX / 2
 239
 240 struct TrellisNode {
 241     int prev_node;
 242     int quant;
 243     int bits;
 244     int score;
 245 };
 246
 247 #define MAX_STORED_Q 16
 248
 249 typedef struct ProresThreadData {
 250     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 251     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 252     int16_t custom_q[64];
 253     int16_t custom_chroma_q[64];
 254     struct TrellisNode *nodes;
 255 } ProresThreadData;
 256
 257 typedef struct ProresContext {
 258     AVClass *class;
 259     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 260     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 261     int16_t quants[MAX_STORED_Q][64];
 262     int16_t quants_chroma[MAX_STORED_Q][64];
 263     int16_t custom_q[64];
 264     int16_t custom_chroma_q[64];
 265     const uint8_t *quant_mat;
 266     const uint8_t *quant_chroma_mat;
 267     const uint8_t *scantable;
 268
 269     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 270                  ptrdiff_t linesize, int16_t *block);
 271     FDCTDSPContext fdsp;
 272
 273     const AVFrame *pic;
 274     int mb_width, mb_height;
 275     int mbs_per_slice;
 276     int num_chroma_blocks, chroma_factor;
 277     int slices_width;
 278     int slices_per_picture;
 279     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 280     int cur_picture_idx;
 281     int num_planes;
 282     int bits_per_mb;
 283     int force_quant;
 284     int alpha_bits;
 285     int warn;
 286
 287     char *vendor;
 288     int quant_sel;
 289
 290     int frame_size_upper_bound;
 291
 292     int profile;
 293     const struct prores_profile *profile_info;
 294
 295     int *slice_q;
 296
 297     ProresThreadData *tdata;
 298 } ProresContext;
 299
 300 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 301                            ptrdiff_t linesize, int x, int y, int w, int h,
 302                            int16_t *blocks, uint16_t *emu_buf,
 303                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 304 {
 305     const uint16_t *esrc;
 306     const int mb_width = 4 * blocks_per_mb;
 307     ptrdiff_t elinesize;
 308     int i, j, k;
 309
 310     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 311         if (x >= w) {
 312             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 313                               * sizeof(*blocks));
 314             return;
 315         }
 316         if (x + mb_width <= w && y + 16 <= h) {
 317             esrc      = src;
 318             elinesize = linesize;
 319         } else {
 320             int bw, bh, pix;
 321
 322             esrc      = emu_buf;
 323             elinesize = 16 * sizeof(*emu_buf);
 324
 325             bw = FFMIN(w - x, mb_width);
 326             bh = FFMIN(h - y, 16);
 327
 328             for (j = 0; j < bh; j++) {
 329                 memcpy(emu_buf + j * 16,
 330                        (const uint8_t*)src + j * linesize,
 331                        bw * sizeof(*src));
 332                 pix = emu_buf[j * 16 + bw - 1];
 333                 for (k = bw; k < mb_width; k++)
 334                     emu_buf[j * 16 + k] = pix;
 335             }
 336             for (; j < 16; j++)
 337                 memcpy(emu_buf + j * 16,
 338                        emu_buf + (bh - 1) * 16,
 339                        mb_width * sizeof(*emu_buf));
 340         }
 341         if (!is_chroma) {
 342             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 343             blocks += 64;
 344             if (blocks_per_mb > 2) {
 345                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 346                 blocks += 64;
 347             }
 348             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 349             blocks += 64;
 350             if (blocks_per_mb > 2) {
 351                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 352                 blocks += 64;
 353             }
 354         } else {
 355             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 356             blocks += 64;
 357             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 358             blocks += 64;
 359             if (blocks_per_mb > 2) {
 360                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 361                 blocks += 64;
 362                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 363                 blocks += 64;
 364             }
 365         }
 366
 367         x += mb_width;
 368     }
 369 }
 370
 371 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 372                            ptrdiff_t linesize, int x, int y, int w, int h,
 373                            int16_t *blocks, int mbs_per_slice, int abits)
 374 {
 375     const int slice_width = 16 * mbs_per_slice;
 376     int i, j, copy_w, copy_h;
 377
 378     copy_w = FFMIN(w - x, slice_width);
 379     copy_h = FFMIN(h - y, 16);
 380     for (i = 0; i < copy_h; i++) {
 381         memcpy(blocks, src, copy_w * sizeof(*src));
 382         if (abits == 8)
 383             for (j = 0; j < copy_w; j++)
 384                 blocks[j] >>= 2;
 385         else
 386             for (j = 0; j < copy_w; j++)
 387                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 388         for (j = copy_w; j < slice_width; j++)
 389             blocks[j] = blocks[copy_w - 1];
 390         blocks += slice_width;
 391         src    += linesize >> 1;
 392     }
 393     for (; i < 16; i++) {
 394         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 395         blocks += slice_width;
 396     }
 397 }
 398
 399 /**
 400  * Write an unsigned rice/exp golomb codeword.
 401  */
 402 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 403 {
 404     unsigned int rice_order, exp_order, switch_bits, switch_val;
 405     int exponent;
 406
 407     /* number of prefix bits to switch between Rice and expGolomb */
 408     switch_bits = (codebook & 3) + 1;
 409     rice_order  =  codebook >> 5;       /* rice code order */
 410     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 411
 412     switch_val  = switch_bits << rice_order;
 413
 414     if (val >= switch_val) {
 415         val -= switch_val - (1 << exp_order);
 416         exponent = av_log2(val);
 417
 418         put_bits(pb, exponent - exp_order + switch_bits, 0);
 419         put_bits(pb, exponent + 1, val);
 420     } else {
 421         exponent = val >> rice_order;
 422
 423         if (exponent)
 424             put_bits(pb, exponent, 0);
 425         put_bits(pb, 1, 1);
 426         if (rice_order)
 427             put_sbits(pb, rice_order, val);
 428     }
 429 }
 430
 431 #define GET_SIGN(x)  ((x) >> 31)
 432 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
 433
 434 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 435                        int blocks_per_slice, int scale)
 436 {
 437     int i;
 438     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 439
 440     prev_dc = (blocks[0] - 0x4000) / scale;
 441     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 442     sign     = 0;
 443     codebook = 3;
 444     blocks  += 64;
 445
 446     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 447         dc       = (blocks[0] - 0x4000) / scale;
 448         delta    = dc - prev_dc;
 449         new_sign = GET_SIGN(delta);
 450         delta    = (delta ^ sign) - sign;
 451         code     = MAKE_CODE(delta);
 452         encode_vlc_codeword(pb, prores_dc_codebook[codebook], code);
 453         codebook = (code + (code & 1)) >> 1;
 454         codebook = FFMIN(codebook, 3);
 455         sign     = new_sign;
 456         prev_dc  = dc;
 457     }
 458 }
 459
 460 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 461                        int blocks_per_slice,
 462                        int plane_size_factor,
 463                        const uint8_t *scan, const int16_t *qmat)
 464 {
 465     int idx, i;
 466     int run, level, run_cb, lev_cb;
 467     int max_coeffs, abs_level;
 468
 469     max_coeffs = blocks_per_slice << 6;
 470     run_cb     = prores_run_to_cb_index[4];
 471     lev_cb     = prores_lev_to_cb_index[2];
 472     run        = 0;
 473
 474     for (i = 1; i < 64; i++) {
 475         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 476             level = blocks[idx] / qmat[scan[i]];
 477             if (level) {
 478                 abs_level = FFABS(level);
 479                 encode_vlc_codeword(pb, prores_ac_codebook[run_cb], run);
 480                 encode_vlc_codeword(pb, prores_ac_codebook[lev_cb],
 481                                     abs_level - 1);
 482                 put_sbits(pb, 1, GET_SIGN(level));
 483
 484                 run_cb = prores_run_to_cb_index[FFMIN(run, 15)];
 485                 lev_cb = prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 486                 run    = 0;
 487             } else {
 488                 run++;
 489             }
 490         }
 491     }
 492 }
 493
 494 static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 495                               const uint16_t *src, ptrdiff_t linesize,
 496                               int mbs_per_slice, int16_t *blocks,
 497                               int blocks_per_mb, int plane_size_factor,
 498                               const int16_t *qmat)
 499 {
 500     int blocks_per_slice = mbs_per_slice * blocks_per_mb;
 501
 502     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 503     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 504                ctx->scantable, qmat);
 505 }
 506
 507 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 508 {
 509     const int dbits = (abits == 8) ? 4 : 7;
 510     const int dsize = 1 << dbits - 1;
 511     int diff = cur - prev;
 512
 513     diff = av_mod_uintp2(diff, abits);
 514     if (diff >= (1 << abits) - dsize)
 515         diff -= 1 << abits;
 516     if (diff < -dsize || diff > dsize || !diff) {
 517         put_bits(pb, 1, 1);
 518         put_bits(pb, abits, diff);
 519     } else {
 520         put_bits(pb, 1, 0);
 521         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 522         put_bits(pb, 1, diff < 0);
 523     }
 524 }
 525
 526 static void put_alpha_run(PutBitContext *pb, int run)
 527 {
 528     if (run) {
 529         put_bits(pb, 1, 0);
 530         if (run < 0x10)
 531             put_bits(pb, 4, run);
 532         else
 533             put_bits(pb, 15, run);
 534     } else {
 535         put_bits(pb, 1, 1);
 536     }
 537 }
 538
 539 // todo alpha quantisation for high quants
 540 static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 541                               int mbs_per_slice, uint16_t *blocks,
 542                               int quant)
 543 {
 544     const int abits = ctx->alpha_bits;
 545     const int mask  = (1 << abits) - 1;
 546     const int num_coeffs = mbs_per_slice * 256;
 547     int prev = mask, cur;
 548     int idx = 0;
 549     int run = 0;
 550
 551     cur = blocks[idx++];
 552     put_alpha_diff(pb, cur, prev, abits);
 553     prev = cur;
 554     do {
 555         cur = blocks[idx++];
 556         if (cur != prev) {
 557             put_alpha_run (pb, run);
 558             put_alpha_diff(pb, cur, prev, abits);
 559             prev = cur;
 560             run  = 0;
 561         } else {
 562             run++;
 563         }
 564     } while (idx < num_coeffs);
 565     if (run)
 566         put_alpha_run(pb, run);
 567 }
 568
 569 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 570                         PutBitContext *pb,
 571                         int sizes[4], int x, int y, int quant,
 572                         int mbs_per_slice)
 573 {
 574     ProresContext *ctx = avctx->priv_data;
 575     int i, xp, yp;
 576     int total_size = 0;
 577     const uint16_t *src;
 578     int slice_width_factor = av_log2(mbs_per_slice);
 579     int num_cblocks, pwidth, line_add;
 580     ptrdiff_t linesize;
 581     int plane_factor, is_chroma;
 582     uint16_t *qmat;
 583     uint16_t *qmat_chroma;
 584
 585     if (ctx->pictures_per_frame == 1)
 586         line_add = 0;
 587     else
 588         line_add = ctx->cur_picture_idx ^ !(pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST);
 589
 590     if (ctx->force_quant) {
 591         qmat = ctx->quants[0];
 592         qmat_chroma = ctx->quants_chroma[0];
 593     } else if (quant < MAX_STORED_Q) {
 594         qmat = ctx->quants[quant];
 595         qmat_chroma = ctx->quants_chroma[quant];
 596     } else {
 597         qmat = ctx->custom_q;
 598         qmat_chroma = ctx->custom_chroma_q;
 599         for (i = 0; i < 64; i++) {
 600             qmat[i] = ctx->quant_mat[i] * quant;
 601             qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
 602         }
 603     }
 604
 605     for (i = 0; i < ctx->num_planes; i++) {
 606         is_chroma    = (i == 1 || i == 2);
 607         plane_factor = slice_width_factor + 2;
 608         if (is_chroma)
 609             plane_factor += ctx->chroma_factor - 3;
 610         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 611             xp          = x << 4;
 612             yp          = y << 4;
 613             num_cblocks = 4;
 614             pwidth      = avctx->width;
 615         } else {
 616             xp          = x << 3;
 617             yp          = y << 4;
 618             num_cblocks = 2;
 619             pwidth      = avctx->width >> 1;
 620         }
 621
 622         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 623         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 624                                 line_add * pic->linesize[i]) + xp;
 625
 626         if (i < 3) {
 627             get_slice_data(ctx, src, linesize, xp, yp,
 628                            pwidth, avctx->height / ctx->pictures_per_frame,
 629                            ctx->blocks[0], ctx->emu_buf,
 630                            mbs_per_slice, num_cblocks, is_chroma);
 631             if (!is_chroma) {/* luma quant */
 632                 encode_slice_plane(ctx, pb, src, linesize,
 633                                    mbs_per_slice, ctx->blocks[0],
 634                                    num_cblocks, plane_factor, qmat);
 635             } else { /* chroma plane */
 636                 encode_slice_plane(ctx, pb, src, linesize,
 637                                    mbs_per_slice, ctx->blocks[0],
 638                                    num_cblocks, plane_factor, qmat_chroma);
 639             }
 640         } else {
 641             get_alpha_data(ctx, src, linesize, xp, yp,
 642                            pwidth, avctx->height / ctx->pictures_per_frame,
 643                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 644             encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
 645         }
 646         flush_put_bits(pb);
 647         sizes[i]   = put_bytes_output(pb) - total_size;
 648         total_size = put_bytes_output(pb);
 649     }
 650     return total_size;
 651 }
 652
 653 static inline int estimate_vlc(unsigned codebook, int val)
 654 {
 655     unsigned int rice_order, exp_order, switch_bits, switch_val;
 656     int exponent;
 657
 658     /* number of prefix bits to switch between Rice and expGolomb */
 659     switch_bits = (codebook & 3) + 1;
 660     rice_order  =  codebook >> 5;       /* rice code order */
 661     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 662
 663     switch_val  = switch_bits << rice_order;
 664
 665     if (val >= switch_val) {
 666         val -= switch_val - (1 << exp_order);
 667         exponent = av_log2(val);
 668
 669         return exponent * 2 - exp_order + switch_bits + 1;
 670     } else {
 671         return (val >> rice_order) + rice_order + 1;
 672     }
 673 }
 674
 675 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 676                         int scale)
 677 {
 678     int i;
 679     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 680     int bits;
 681
 682     prev_dc  = (blocks[0] - 0x4000) / scale;
 683     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 684     sign     = 0;
 685     codebook = 3;
 686     blocks  += 64;
 687     *error  += FFABS(blocks[0] - 0x4000) % scale;
 688
 689     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 690         dc       = (blocks[0] - 0x4000) / scale;
 691         *error  += FFABS(blocks[0] - 0x4000) % scale;
 692         delta    = dc - prev_dc;
 693         new_sign = GET_SIGN(delta);
 694         delta    = (delta ^ sign) - sign;
 695         code     = MAKE_CODE(delta);
 696         bits    += estimate_vlc(prores_dc_codebook[codebook], code);
 697         codebook = (code + (code & 1)) >> 1;
 698         codebook = FFMIN(codebook, 3);
 699         sign     = new_sign;
 700         prev_dc  = dc;
 701     }
 702
 703     return bits;
 704 }
 705
 706 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 707                         int plane_size_factor,
 708                         const uint8_t *scan, const int16_t *qmat)
 709 {
 710     int idx, i;
 711     int run, level, run_cb, lev_cb;
 712     int max_coeffs, abs_level;
 713     int bits = 0;
 714
 715     max_coeffs = blocks_per_slice << 6;
 716     run_cb     = prores_run_to_cb_index[4];
 717     lev_cb     = prores_lev_to_cb_index[2];
 718     run        = 0;
 719
 720     for (i = 1; i < 64; i++) {
 721         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 722             level   = blocks[idx] / qmat[scan[i]];
 723             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 724             if (level) {
 725                 abs_level = FFABS(level);
 726                 bits += estimate_vlc(prores_ac_codebook[run_cb], run);
 727                 bits += estimate_vlc(prores_ac_codebook[lev_cb],
 728                                      abs_level - 1) + 1;
 729
 730                 run_cb = prores_run_to_cb_index[FFMIN(run, 15)];
 731                 lev_cb = prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 732                 run    = 0;
 733             } else {
 734                 run++;
 735             }
 736         }
 737     }
 738
 739     return bits;
 740 }
 741
 742 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 743                                 const uint16_t *src, ptrdiff_t linesize,
 744                                 int mbs_per_slice,
 745                                 int blocks_per_mb, int plane_size_factor,
 746                                 const int16_t *qmat, ProresThreadData *td)
 747 {
 748     int blocks_per_slice;
 749     int bits;
 750
 751     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 752
 753     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 754     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 755                          plane_size_factor, ctx->scantable, qmat);
 756
 757     return FFALIGN(bits, 8);
 758 }
 759
 760 static int est_alpha_diff(int cur, int prev, int abits)
 761 {
 762     const int dbits = (abits == 8) ? 4 : 7;
 763     const int dsize = 1 << dbits - 1;
 764     int diff = cur - prev;
 765
 766     diff = av_mod_uintp2(diff, abits);
 767     if (diff >= (1 << abits) - dsize)
 768         diff -= 1 << abits;
 769     if (diff < -dsize || diff > dsize || !diff)
 770         return abits + 1;
 771     else
 772         return dbits + 1;
 773 }
 774
 775 static int estimate_alpha_plane(ProresContext *ctx,
 776                                 const uint16_t *src, ptrdiff_t linesize,
 777                                 int mbs_per_slice, int16_t *blocks)
 778 {
 779     const int abits = ctx->alpha_bits;
 780     const int mask  = (1 << abits) - 1;
 781     const int num_coeffs = mbs_per_slice * 256;
 782     int prev = mask, cur;
 783     int idx = 0;
 784     int run = 0;
 785     int bits;
 786
 787     cur = blocks[idx++];
 788     bits = est_alpha_diff(cur, prev, abits);
 789     prev = cur;
 790     do {
 791         cur = blocks[idx++];
 792         if (cur != prev) {
 793             if (!run)
 794                 bits++;
 795             else if (run < 0x10)
 796                 bits += 4;
 797             else
 798                 bits += 15;
 799             bits += est_alpha_diff(cur, prev, abits);
 800             prev = cur;
 801             run  = 0;
 802         } else {
 803             run++;
 804         }
 805     } while (idx < num_coeffs);
 806
 807     if (run) {
 808         if (run < 0x10)
 809             bits += 4;
 810         else
 811             bits += 15;
 812     }
 813
 814     return bits;
 815 }
 816
 817 static int find_slice_quant(AVCodecContext *avctx,
 818                             int trellis_node, int x, int y, int mbs_per_slice,
 819                             ProresThreadData *td)
 820 {
 821     ProresContext *ctx = avctx->priv_data;
 822     int i, q, pq, xp, yp;
 823     const uint16_t *src;
 824     int slice_width_factor = av_log2(mbs_per_slice);
 825     int num_cblocks[MAX_PLANES], pwidth;
 826     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 827     const int min_quant = ctx->profile_info->min_quant;
 828     const int max_quant = ctx->profile_info->max_quant;
 829     int error, bits, bits_limit;
 830     int mbs, prev, cur, new_score;
 831     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 832     int overquant;
 833     uint16_t *qmat;
 834     uint16_t *qmat_chroma;
 835     int linesize[4], line_add;
 836     int alpha_bits = 0;
 837
 838     if (ctx->pictures_per_frame == 1)
 839         line_add = 0;
 840     else
 841         line_add = ctx->cur_picture_idx ^ !(ctx->pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST);
 842     mbs = x + mbs_per_slice;
 843
 844     for (i = 0; i < ctx->num_planes; i++) {
 845         is_chroma[i]    = (i == 1 || i == 2);
 846         plane_factor[i] = slice_width_factor + 2;
 847         if (is_chroma[i])
 848             plane_factor[i] += ctx->chroma_factor - 3;
 849         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 850             xp             = x << 4;
 851             yp             = y << 4;
 852             num_cblocks[i] = 4;
 853             pwidth         = avctx->width;
 854         } else {
 855             xp             = x << 3;
 856             yp             = y << 4;
 857             num_cblocks[i] = 2;
 858             pwidth         = avctx->width >> 1;
 859         }
 860
 861         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
 862         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
 863                                  line_add * ctx->pic->linesize[i]) + xp;
 864
 865         if (i < 3) {
 866             get_slice_data(ctx, src, linesize[i], xp, yp,
 867                            pwidth, avctx->height / ctx->pictures_per_frame,
 868                            td->blocks[i], td->emu_buf,
 869                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 870         } else {
 871             get_alpha_data(ctx, src, linesize[i], xp, yp,
 872                            pwidth, avctx->height / ctx->pictures_per_frame,
 873                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 874         }
 875     }
 876
 877     for (q = min_quant; q < max_quant + 2; q++) {
 878         td->nodes[trellis_node + q].prev_node = -1;
 879         td->nodes[trellis_node + q].quant     = q;
 880     }
 881
 882     if (ctx->alpha_bits)
 883         alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
 884                                           mbs_per_slice, td->blocks[3]);
 885     // todo: maybe perform coarser quantising to fit into frame size when needed
 886     for (q = min_quant; q <= max_quant; q++) {
 887         bits  = alpha_bits;
 888         error = 0;
 889         bits += estimate_slice_plane(ctx, &error, 0,
 890                                      src, linesize[0],
 891                                      mbs_per_slice,
 892                                      num_cblocks[0], plane_factor[0],
 893                                      ctx->quants[q], td); /* estimate luma plane */
 894         for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
 895             bits += estimate_slice_plane(ctx, &error, i,
 896                                          src, linesize[i],
 897                                          mbs_per_slice,
 898                                          num_cblocks[i], plane_factor[i],
 899                                          ctx->quants_chroma[q], td);
 900         }
 901         if (bits > 65000 * 8)
 902             error = SCORE_LIMIT;
 903
 904         slice_bits[q]  = bits;
 905         slice_score[q] = error;
 906     }
 907     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 908         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 909         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 910         overquant = max_quant;
 911     } else {
 912         for (q = max_quant + 1; q < 128; q++) {
 913             bits  = alpha_bits;
 914             error = 0;
 915             if (q < MAX_STORED_Q) {
 916                 qmat = ctx->quants[q];
 917                 qmat_chroma = ctx->quants_chroma[q];
 918             } else {
 919                 qmat = td->custom_q;
 920                 qmat_chroma = td->custom_chroma_q;
 921                 for (i = 0; i < 64; i++) {
 922                     qmat[i] = ctx->quant_mat[i] * q;
 923                     qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
 924                 }
 925             }
 926             bits += estimate_slice_plane(ctx, &error, 0,
 927                                          src, linesize[0],
 928                                          mbs_per_slice,
 929                                          num_cblocks[0], plane_factor[0],
 930                                          qmat, td);/* estimate luma plane */
 931             for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
 932                 bits += estimate_slice_plane(ctx, &error, i,
 933                                              src, linesize[i],
 934                                              mbs_per_slice,
 935                                              num_cblocks[i], plane_factor[i],
 936                                              qmat_chroma, td);
 937             }
 938             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 939                 break;
 940         }
 941
 942         slice_bits[max_quant + 1]  = bits;
 943         slice_score[max_quant + 1] = error;
 944         overquant = q;
 945     }
 946     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 947
 948     bits_limit = mbs * ctx->bits_per_mb;
 949     for (pq = min_quant; pq < max_quant + 2; pq++) {
 950         prev = trellis_node - TRELLIS_WIDTH + pq;
 951
 952         for (q = min_quant; q < max_quant + 2; q++) {
 953             cur = trellis_node + q;
 954
 955             bits  = td->nodes[prev].bits + slice_bits[q];
 956             error = slice_score[q];
 957             if (bits > bits_limit)
 958                 error = SCORE_LIMIT;
 959
 960             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 961                 new_score = td->nodes[prev].score + error;
 962             else
 963                 new_score = SCORE_LIMIT;
 964             if (td->nodes[cur].prev_node == -1 ||
 965                 td->nodes[cur].score >= new_score) {
 966
 967                 td->nodes[cur].bits      = bits;
 968                 td->nodes[cur].score     = new_score;
 969                 td->nodes[cur].prev_node = prev;
 970             }
 971         }
 972     }
 973
 974     error = td->nodes[trellis_node + min_quant].score;
 975     pq    = trellis_node + min_quant;
 976     for (q = min_quant + 1; q < max_quant + 2; q++) {
 977         if (td->nodes[trellis_node + q].score <= error) {
 978             error = td->nodes[trellis_node + q].score;
 979             pq    = trellis_node + q;
 980         }
 981     }
 982
 983     return pq;
 984 }
 985
 986 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 987                              int jobnr, int threadnr)
 988 {
 989     ProresContext *ctx = avctx->priv_data;
 990     ProresThreadData *td = ctx->tdata + threadnr;
 991     int mbs_per_slice = ctx->mbs_per_slice;
 992     int x, y = jobnr, mb, q = 0;
 993
 994     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 995         while (ctx->mb_width - x < mbs_per_slice)
 996             mbs_per_slice >>= 1;
 997         q = find_slice_quant(avctx,
 998                              (mb + 1) * TRELLIS_WIDTH, x, y,
 999                              mbs_per_slice, td);
1000     }
1001
1002     for (x = ctx->slices_width - 1; x >= 0; x--) {
1003         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
1004         q = td->nodes[q].prev_node;
1005     }
1006
1007     return 0;
1008 }
1009
1010 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1011                         const AVFrame *pic, int *got_packet)
1012 {
1013     ProresContext *ctx = avctx->priv_data;
1014     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
1015     uint8_t *picture_size_pos;
1016     PutBitContext pb;
1017     int x, y, i, mb, q = 0;
1018     int sizes[4] = { 0 };
1019     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
1020     int frame_size, picture_size, slice_size;
1021     int pkt_size, ret;
1022     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
1023     uint8_t frame_flags;
1024
1025     ctx->pic = pic;
1026     pkt_size = ctx->frame_size_upper_bound;
1027
1028     if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1029         return ret;
1030
1031     orig_buf = pkt->data;
1032
1033     // frame atom
1034     orig_buf += 4;                              // frame size
1035     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1036     buf = orig_buf;
1037
1038     // frame header
1039     tmp = buf;
1040     buf += 2;                                   // frame header size will be stored here
1041     bytestream_put_be16  (&buf, 0);             // version 1
1042     bytestream_put_buffer(&buf, ctx->vendor, 4);
1043     bytestream_put_be16  (&buf, avctx->width);
1044     bytestream_put_be16  (&buf, avctx->height);
1045
1046     frame_flags = ctx->chroma_factor << 6;
1047     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1048         frame_flags |= (pic->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 0x04 : 0x08;
1049     bytestream_put_byte  (&buf, frame_flags);
1050
1051     bytestream_put_byte  (&buf, 0);             // reserved
1052     bytestream_put_byte  (&buf, pic->color_primaries);
1053     bytestream_put_byte  (&buf, pic->color_trc);
1054     bytestream_put_byte  (&buf, pic->colorspace);
1055     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1056     bytestream_put_byte  (&buf, 0);             // reserved
1057     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1058         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1059         // luma quantisation matrix
1060         for (i = 0; i < 64; i++)
1061             bytestream_put_byte(&buf, ctx->quant_mat[i]);
1062         // chroma quantisation matrix
1063         for (i = 0; i < 64; i++)
1064             bytestream_put_byte(&buf, ctx->quant_mat[i]);
1065     } else {
1066         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1067     }
1068     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1069
1070     for (ctx->cur_picture_idx = 0;
1071          ctx->cur_picture_idx < ctx->pictures_per_frame;
1072          ctx->cur_picture_idx++) {
1073         // picture header
1074         picture_size_pos = buf + 1;
1075         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1076         buf += 4;                                   // picture data size will be stored here
1077         bytestream_put_be16  (&buf, ctx->slices_per_picture);
1078         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1079
1080         // seek table - will be filled during slice encoding
1081         slice_sizes = buf;
1082         buf += ctx->slices_per_picture * 2;
1083
1084         // slices
1085         if (!ctx->force_quant) {
1086             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1087                                   ctx->mb_height);
1088             if (ret)
1089                 return ret;
1090         }
1091
1092         for (y = 0; y < ctx->mb_height; y++) {
1093             int mbs_per_slice = ctx->mbs_per_slice;
1094             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1095                 q = ctx->force_quant ? ctx->force_quant
1096                                      : ctx->slice_q[mb + y * ctx->slices_width];
1097
1098                 while (ctx->mb_width - x < mbs_per_slice)
1099                     mbs_per_slice >>= 1;
1100
1101                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1102                 slice_hdr = buf;
1103                 buf += slice_hdr_size - 1;
1104                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1105                     uint8_t *start = pkt->data;
1106                     // Recompute new size according to max_slice_size
1107                     // and deduce delta
1108                     int delta = 200 + (ctx->pictures_per_frame *
1109                                 ctx->slices_per_picture + 1) *
1110                                 max_slice_size - pkt_size;
1111
1112                     delta = FFMAX(delta, 2 * max_slice_size);
1113                     ctx->frame_size_upper_bound += delta;
1114
1115                     if (!ctx->warn) {
1116                         avpriv_request_sample(avctx,
1117                                               "Packet too small: is %i,"
1118                                               " needs %i (slice: %i). "
1119                                               "Correct allocation",
1120                                               pkt_size, delta, max_slice_size);
1121                         ctx->warn = 1;
1122                     }
1123
1124                     ret = av_grow_packet(pkt, delta);
1125                     if (ret < 0)
1126                         return ret;
1127
1128                     pkt_size += delta;
1129                     // restore pointers
1130                     orig_buf         = pkt->data + (orig_buf         - start);
1131                     buf              = pkt->data + (buf              - start);
1132                     picture_size_pos = pkt->data + (picture_size_pos - start);
1133                     slice_sizes      = pkt->data + (slice_sizes      - start);
1134                     slice_hdr        = pkt->data + (slice_hdr        - start);
1135                     tmp              = pkt->data + (tmp              - start);
1136                 }
1137                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1138                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1139                                    mbs_per_slice);
1140                 if (ret < 0)
1141                     return ret;
1142
1143                 bytestream_put_byte(&slice_hdr, q);
1144                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1145                 for (i = 0; i < ctx->num_planes - 1; i++) {
1146                     bytestream_put_be16(&slice_hdr, sizes[i]);
1147                     slice_size += sizes[i];
1148                 }
1149                 bytestream_put_be16(&slice_sizes, slice_size);
1150                 buf += slice_size - slice_hdr_size;
1151                 if (max_slice_size < slice_size)
1152                     max_slice_size = slice_size;
1153             }
1154         }
1155
1156         picture_size = buf - (picture_size_pos - 1);
1157         bytestream_put_be32(&picture_size_pos, picture_size);
1158     }
1159
1160     orig_buf -= 8;
1161     frame_size = buf - orig_buf;
1162     bytestream_put_be32(&orig_buf, frame_size);
1163
1164     pkt->size   = frame_size;
1165     *got_packet = 1;
1166
1167     return 0;
1168 }
1169
1170 static av_cold int encode_close(AVCodecContext *avctx)
1171 {
1172     ProresContext *ctx = avctx->priv_data;
1173     int i;
1174
1175     if (ctx->tdata) {
1176         for (i = 0; i < avctx->thread_count; i++)
1177             av_freep(&ctx->tdata[i].nodes);
1178     }
1179     av_freep(&ctx->tdata);
1180     av_freep(&ctx->slice_q);
1181
1182     return 0;
1183 }
1184
1185 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1186                         ptrdiff_t linesize, int16_t *block)
1187 {
1188     int x, y;
1189     const uint16_t *tsrc = src;
1190
1191     for (y = 0; y < 8; y++) {
1192         for (x = 0; x < 8; x++)
1193             block[y * 8 + x] = tsrc[x];
1194         tsrc += linesize >> 1;
1195     }
1196     fdsp->fdct(block);
1197 }
1198
1199 static av_cold int encode_init(AVCodecContext *avctx)
1200 {
1201     ProresContext *ctx = avctx->priv_data;
1202     int mps;
1203     int i, j;
1204     int min_quant, max_quant;
1205     int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1206
1207     avctx->bits_per_raw_sample = 10;
1208
1209     ctx->fdct      = prores_fdct;
1210     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1211                                 : ff_prores_progressive_scan;
1212     ff_fdctdsp_init(&ctx->fdsp, avctx);
1213
1214     mps = ctx->mbs_per_slice;
1215     if (mps & (mps - 1)) {
1216         av_log(avctx, AV_LOG_ERROR,
1217                "there should be an integer power of two MBs per slice\n");
1218         return AVERROR(EINVAL);
1219     }
1220     if (ctx->profile == PRORES_PROFILE_AUTO) {
1221         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1222         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1223                         !(desc->log2_chroma_w + desc->log2_chroma_h))
1224                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1225         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1226                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1227                ? "4:4:4:4 profile because of the used input colorspace"
1228                : "HQ profile to keep best quality");
1229     }
1230     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1231         if (ctx->profile != PRORES_PROFILE_4444 &&
1232             ctx->profile != PRORES_PROFILE_4444XQ) {
1233             // force alpha and warn
1234             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1235                    "encode alpha. Override with -profile if needed.\n");
1236             ctx->alpha_bits = 0;
1237         }
1238         if (ctx->alpha_bits & 7) {
1239             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1240             return AVERROR(EINVAL);
1241         }
1242         avctx->bits_per_coded_sample = 32;
1243     } else {
1244         ctx->alpha_bits = 0;
1245     }
1246
1247     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1248                          ? CFACTOR_Y422
1249                          : CFACTOR_Y444;
1250     ctx->profile_info  = prores_profile_info + ctx->profile;
1251     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1252
1253     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1254
1255     if (interlaced)
1256         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1257     else
1258         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1259
1260     ctx->slices_width  = ctx->mb_width / mps;
1261     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1262     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1263     ctx->pictures_per_frame = 1 + interlaced;
1264
1265     if (ctx->quant_sel == -1) {
1266         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1267         ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1268     } else {
1269         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1270         ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1271     }
1272
1273     if (strlen(ctx->vendor) != 4) {
1274         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1275         return AVERROR_INVALIDDATA;
1276     }
1277
1278     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1279     if (!ctx->force_quant) {
1280         if (!ctx->bits_per_mb) {
1281             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1282                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1283                                            ctx->pictures_per_frame)
1284                     break;
1285             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1286             if (ctx->alpha_bits)
1287                 ctx->bits_per_mb *= 20;
1288         } else if (ctx->bits_per_mb < 128) {
1289             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1290             return AVERROR_INVALIDDATA;
1291         }
1292
1293         min_quant = ctx->profile_info->min_quant;
1294         max_quant = ctx->profile_info->max_quant;
1295         for (i = min_quant; i < MAX_STORED_Q; i++) {
1296             for (j = 0; j < 64; j++) {
1297                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1298                 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1299             }
1300         }
1301
1302         ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1303         if (!ctx->slice_q)
1304             return AVERROR(ENOMEM);
1305
1306         ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1307         if (!ctx->tdata)
1308             return AVERROR(ENOMEM);
1309
1310         for (j = 0; j < avctx->thread_count; j++) {
1311             ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1312                                                   TRELLIS_WIDTH
1313                                                   * sizeof(*ctx->tdata->nodes));
1314             if (!ctx->tdata[j].nodes)
1315                 return AVERROR(ENOMEM);
1316             for (i = min_quant; i < max_quant + 2; i++) {
1317                 ctx->tdata[j].nodes[i].prev_node = -1;
1318                 ctx->tdata[j].nodes[i].bits      = 0;
1319                 ctx->tdata[j].nodes[i].score     = 0;
1320             }
1321         }
1322     } else {
1323         int ls = 0;
1324         int ls_chroma = 0;
1325
1326         if (ctx->force_quant > 64) {
1327             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1328             return AVERROR_INVALIDDATA;
1329         }
1330
1331         for (j = 0; j < 64; j++) {
1332             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1333             ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1334             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1335             ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1336         }
1337
1338         ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1339         if (ctx->chroma_factor == CFACTOR_Y444)
1340             ctx->bits_per_mb += ls_chroma * 4;
1341     }
1342
1343     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1344                                    ctx->slices_per_picture + 1) *
1345                                   (2 + 2 * ctx->num_planes +
1346                                    (mps * ctx->bits_per_mb) / 8)
1347                                   + 200;
1348
1349     if (ctx->alpha_bits) {
1350          // The alpha plane is run-coded and might exceed the bit budget.
1351          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1352                                          ctx->slices_per_picture + 1) *
1353          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1354          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1355     }
1356
1357     avctx->codec_tag   = ctx->profile_info->tag;
1358     avctx->profile = ctx->profile;
1359
1360     av_log(avctx, AV_LOG_DEBUG,
1361            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1362            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1363            interlaced ? "yes" : "no", ctx->bits_per_mb);
1364     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1365            ctx->frame_size_upper_bound);
1366
1367     return 0;
1368 }
1369
1370 #define OFFSET(x) offsetof(ProresContext, x)
1371 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1372
1373 static const AVOption options[] = {
1374     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1375         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1376     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1377         { .i64 = PRORES_PROFILE_AUTO },
1378         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1379     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1380         0, 0, VE, "profile" },
1381     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1382         0, 0, VE, "profile" },
1383     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1384         0, 0, VE, "profile" },
1385     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1386         0, 0, VE, "profile" },
1387     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1388         0, 0, VE, "profile" },
1389     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1390         0, 0, VE, "profile" },
1391     { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1392         0, 0, VE, "profile" },
1393     { "vendor", "vendor ID", OFFSET(vendor),
1394         AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1395     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1396         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1397     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1398         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1399     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1400         0, 0, VE, "quant_mat" },
1401     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1402         0, 0, VE, "quant_mat" },
1403     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1404         0, 0, VE, "quant_mat" },
1405     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1406         0, 0, VE, "quant_mat" },
1407     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1408         0, 0, VE, "quant_mat" },
1409     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1410         0, 0, VE, "quant_mat" },
1411     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1412         { .i64 = 16 }, 0, 16, VE },
1413     { NULL }
1414 };
1415
1416 static const AVClass proresenc_class = {
1417     .class_name = "ProRes encoder",
1418     .item_name  = av_default_item_name,
1419     .option     = options,
1420     .version    = LIBAVUTIL_VERSION_INT,
1421 };
1422
1423 const FFCodec ff_prores_ks_encoder = {
1424     .p.name         = "prores_ks",
1425     CODEC_LONG_NAME("Apple ProRes (iCodec Pro)"),
1426     .p.type         = AVMEDIA_TYPE_VIDEO,
1427     .p.id           = AV_CODEC_ID_PRORES,
1428     .priv_data_size = sizeof(ProresContext),
1429     .init           = encode_init,
1430     .close          = encode_close,
1431     FF_CODEC_ENCODE_CB(encode_frame),
1432     .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS |
1433                       AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
1434     .p.pix_fmts     = (const enum AVPixelFormat[]) {
1435                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1436                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1437                       },
1438     .p.priv_class   = &proresenc_class,
1439     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1440     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
1441 };