src/third_party/ffmpeg/libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "libavcodec/mpegvideo.h"
  37 #include "mpegvideoencdsp.h"
  38 #include "dirac_dwt.h"
  39 #include "dirac.h"
  40 #include "diracdsp.h"
  41 #include "videodsp.h"
  42
  43 /**
  44  * The spec limits the number of wavelet decompositions to 4 for both
  45  * level 1 (VC-2) and 128 (long-gop default).
  46  * 5 decompositions is the maximum before >16-bit buffers are needed.
  47  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  48  * the others to 4 decompositions (or 3 for the fidelity filter).
  49  *
  50  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  51  */
  52 #define MAX_DWT_LEVELS 5
  53
  54 /**
  55  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  56  */
  57 #define MAX_REFERENCE_FRAMES 8
  58 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  60 #define MAX_QUANT 68        /* max quant for VC-2 */
  61 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  62
  63 /**
  64  * DiracBlock->ref flags, if set then the block does MC from the given ref
  65  */
  66 #define DIRAC_REF_MASK_REF1   1
  67 #define DIRAC_REF_MASK_REF2   2
  68 #define DIRAC_REF_MASK_GLOBAL 4
  69
  70 /**
  71  * Value of Picture.reference when Picture is not a reference picture, but
  72  * is held for delayed output.
  73  */
  74 #define DELAYED_PIC_REF 4
  75
  76 #define CALC_PADDING(size, depth)                       \
  77     (((size + (1 << depth) - 1) >> depth) << depth)
  78
  79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  80
  81 typedef struct {
  82     AVFrame *avframe;
  83     int interpolated[3];    /* 1 if hpel[] is valid */
  84     uint8_t *hpel[3][4];
  85     uint8_t *hpel_base[3][4];
  86 } DiracFrame;
  87
  88 typedef struct {
  89     union {
  90         int16_t mv[2][2];
  91         int16_t dc[3];
  92     } u; /* anonymous unions aren't in C99 :( */
  93     uint8_t ref;
  94 } DiracBlock;
  95
  96 typedef struct SubBand {
  97     int level;
  98     int orientation;
  99     int stride;
 100     int width;
 101     int height;
 102     int quant;
 103     IDWTELEM *ibuf;
 104     struct SubBand *parent;
 105
 106     /* for low delay */
 107     unsigned length;
 108     const uint8_t *coeff_data;
 109 } SubBand;
 110
 111 typedef struct Plane {
 112     int width;
 113     int height;
 114     ptrdiff_t stride;
 115
 116     int idwt_width;
 117     int idwt_height;
 118     int idwt_stride;
 119     IDWTELEM *idwt_buf;
 120     IDWTELEM *idwt_buf_base;
 121     IDWTELEM *idwt_tmp;
 122
 123     /* block length */
 124     uint8_t xblen;
 125     uint8_t yblen;
 126     /* block separation (block n+1 starts after this many pixels in block n) */
 127     uint8_t xbsep;
 128     uint8_t ybsep;
 129     /* amount of overspill on each edge (half of the overlap between blocks) */
 130     uint8_t xoffset;
 131     uint8_t yoffset;
 132
 133     SubBand band[MAX_DWT_LEVELS][4];
 134 } Plane;
 135
 136 typedef struct DiracContext {
 137     AVCodecContext *avctx;
 138     MpegvideoEncDSPContext mpvencdsp;
 139     VideoDSPContext vdsp;
 140     DiracDSPContext diracdsp;
 141     GetBitContext gb;
 142     dirac_source_params source;
 143     int seen_sequence_header;
 144     int frame_number;           /* number of the next frame to display       */
 145     Plane plane[3];
 146     int chroma_x_shift;
 147     int chroma_y_shift;
 148
 149     int zero_res;               /* zero residue flag                         */
 150     int is_arith;               /* whether coeffs use arith or golomb coding */
 151     int low_delay;              /* use the low delay syntax                  */
 152     int globalmc_flag;          /* use global motion compensation            */
 153     int num_refs;               /* number of reference pictures              */
 154
 155     /* wavelet decoding */
 156     unsigned wavelet_depth;     /* depth of the IDWT                         */
 157     unsigned wavelet_idx;
 158
 159     /**
 160      * schroedinger older than 1.0.8 doesn't store
 161      * quant delta if only one codebook exists in a band
 162      */
 163     unsigned old_delta_quant;
 164     unsigned codeblock_mode;
 165
 166     struct {
 167         unsigned width;
 168         unsigned height;
 169     } codeblock[MAX_DWT_LEVELS+1];
 170
 171     struct {
 172         unsigned num_x;         /* number of horizontal slices               */
 173         unsigned num_y;         /* number of vertical slices                 */
 174         AVRational bytes;       /* average bytes per slice                   */
 175         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 176     } lowdelay;
 177
 178     struct {
 179         int pan_tilt[2];        /* pan/tilt vector                           */
 180         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 181         int perspective[2];     /* perspective vector                        */
 182         unsigned zrs_exp;
 183         unsigned perspective_exp;
 184     } globalmc[2];
 185
 186     /* motion compensation */
 187     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 188     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 189     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 190
 191     int blwidth;                /* number of blocks (horizontally)           */
 192     int blheight;               /* number of blocks (vertically)             */
 193     int sbwidth;                /* number of superblocks (horizontally)      */
 194     int sbheight;               /* number of superblocks (vertically)        */
 195
 196     uint8_t *sbsplit;
 197     DiracBlock *blmotion;
 198
 199     uint8_t *edge_emu_buffer[4];
 200     uint8_t *edge_emu_buffer_base;
 201
 202     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 203     uint8_t *mcscratch;
 204     int buffer_stride;
 205
 206     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 207
 208     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 209     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 210     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 211     dirac_weight_func weight_func;
 212     dirac_biweight_func biweight_func;
 213
 214     DiracFrame *current_picture;
 215     DiracFrame *ref_pics[2];
 216
 217     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 218     DiracFrame *delay_frames[MAX_DELAY+1];
 219     DiracFrame all_frames[MAX_FRAMES];
 220 } DiracContext;
 221
 222 /**
 223  * Dirac Specification ->
 224  * Parse code values. 9.6.1 Table 9.1
 225  */
 226 enum dirac_parse_code {
 227     pc_seq_header         = 0x00,
 228     pc_eos                = 0x10,
 229     pc_aux_data           = 0x20,
 230     pc_padding            = 0x30,
 231 };
 232
 233 enum dirac_subband {
 234     subband_ll = 0,
 235     subband_hl = 1,
 236     subband_lh = 2,
 237     subband_hh = 3,
 238     subband_nb,
 239 };
 240
 241 static const uint8_t default_qmat[][4][4] = {
 242     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 243     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 244     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 245     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 246     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 247     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 248     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 249 };
 250
 251 static const int qscale_tab[MAX_QUANT+1] = {
 252     4,     5,     6,     7,     8,    10,    11,    13,
 253     16,    19,    23,    27,    32,    38,    45,    54,
 254     64,    76,    91,   108,   128,   152,   181,   215,
 255     256,   304,   362,   431,   512,   609,   724,   861,
 256     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 257     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 258     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 259     65536, 77936
 260 };
 261
 262 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 263     1,     2,     3,     4,     4,     5,     6,     7,
 264     8,    10,    12,    14,    16,    19,    23,    27,
 265     32,    38,    46,    54,    64,    76,    91,   108,
 266     128,   152,   181,   216,   256,   305,   362,   431,
 267     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 268     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 269     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 270     32768, 38968
 271 };
 272
 273 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 274     1,     2,     2,     3,     3,     4,     4,     5,
 275     6,     7,     9,    10,    12,    14,    17,    20,
 276     24,    29,    34,    41,    48,    57,    68,    81,
 277     96,   114,   136,   162,   192,   228,   272,   323,
 278     384,   457,   543,   646,   768,   913,  1086,  1292,
 279     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 280     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 281     24576, 29226
 282 };
 283
 284 /* magic number division by 3 from schroedinger */
 285 static inline int divide3(int x)
 286 {
 287     return ((x+1)*21845 + 10922) >> 16;
 288 }
 289
 290 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 291 {
 292     DiracFrame *remove_pic = NULL;
 293     int i, remove_idx = -1;
 294
 295     for (i = 0; framelist[i]; i++)
 296         if (framelist[i]->avframe->display_picture_number == picnum) {
 297             remove_pic = framelist[i];
 298             remove_idx = i;
 299         }
 300
 301     if (remove_pic)
 302         for (i = remove_idx; framelist[i]; i++)
 303             framelist[i] = framelist[i+1];
 304
 305     return remove_pic;
 306 }
 307
 308 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 309 {
 310     int i;
 311     for (i = 0; i < maxframes; i++)
 312         if (!framelist[i]) {
 313             framelist[i] = frame;
 314             return 0;
 315         }
 316     return -1;
 317 }
 318
 319 static int alloc_sequence_buffers(DiracContext *s)
 320 {
 321     int sbwidth  = DIVRNDUP(s->source.width,  4);
 322     int sbheight = DIVRNDUP(s->source.height, 4);
 323     int i, w, h, top_padding;
 324
 325     /* todo: think more about this / use or set Plane here */
 326     for (i = 0; i < 3; i++) {
 327         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 328         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 329         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 330         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 331
 332         /* we allocate the max we support here since num decompositions can
 333          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 334          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 335          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 336          * on each side */
 337         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 338         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 339         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 340
 341         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 342         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 343         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 344         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 345             return AVERROR(ENOMEM);
 346     }
 347
 348     /* fixme: allocate using real stride here */
 349     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 350     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 351
 352     if (!s->sbsplit || !s->blmotion)
 353         return AVERROR(ENOMEM);
 354     return 0;
 355 }
 356
 357 static int alloc_buffers(DiracContext *s, int stride)
 358 {
 359     int w = s->source.width;
 360     int h = s->source.height;
 361
 362     av_assert0(stride >= w);
 363     stride += 64;
 364
 365     if (s->buffer_stride >= stride)
 366         return 0;
 367     s->buffer_stride = 0;
 368
 369     av_freep(&s->edge_emu_buffer_base);
 370     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 371     av_freep(&s->mctmp);
 372     av_freep(&s->mcscratch);
 373
 374     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 375
 376     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 377     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 378
 379     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 380         return AVERROR(ENOMEM);
 381
 382     s->buffer_stride = stride;
 383     return 0;
 384 }
 385
 386 static void free_sequence_buffers(DiracContext *s)
 387 {
 388     int i, j, k;
 389
 390     for (i = 0; i < MAX_FRAMES; i++) {
 391         if (s->all_frames[i].avframe->data[0]) {
 392             av_frame_unref(s->all_frames[i].avframe);
 393             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 394         }
 395
 396         for (j = 0; j < 3; j++)
 397             for (k = 1; k < 4; k++)
 398                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 399     }
 400
 401     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 402     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 403
 404     for (i = 0; i < 3; i++) {
 405         av_freep(&s->plane[i].idwt_buf_base);
 406         av_freep(&s->plane[i].idwt_tmp);
 407     }
 408
 409     s->buffer_stride = 0;
 410     av_freep(&s->sbsplit);
 411     av_freep(&s->blmotion);
 412     av_freep(&s->edge_emu_buffer_base);
 413
 414     av_freep(&s->mctmp);
 415     av_freep(&s->mcscratch);
 416 }
 417
 418 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 419 {
 420     DiracContext *s = avctx->priv_data;
 421     int i;
 422
 423     s->avctx = avctx;
 424     s->frame_number = -1;
 425
 426     ff_diracdsp_init(&s->diracdsp);
 427     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 428     ff_videodsp_init(&s->vdsp, 8);
 429
 430     for (i = 0; i < MAX_FRAMES; i++) {
 431         s->all_frames[i].avframe = av_frame_alloc();
 432         if (!s->all_frames[i].avframe) {
 433             while (i > 0)
 434                 av_frame_free(&s->all_frames[--i].avframe);
 435             return AVERROR(ENOMEM);
 436         }
 437     }
 438
 439     return 0;
 440 }
 441
 442 static void dirac_decode_flush(AVCodecContext *avctx)
 443 {
 444     DiracContext *s = avctx->priv_data;
 445     free_sequence_buffers(s);
 446     s->seen_sequence_header = 0;
 447     s->frame_number = -1;
 448 }
 449
 450 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 451 {
 452     DiracContext *s = avctx->priv_data;
 453     int i;
 454
 455     dirac_decode_flush(avctx);
 456     for (i = 0; i < MAX_FRAMES; i++)
 457         av_frame_free(&s->all_frames[i].avframe);
 458
 459     return 0;
 460 }
 461
 462 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 463
 464 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 465                                       SubBand *b, IDWTELEM *buf, int x, int y)
 466 {
 467     int coeff, sign;
 468     int sign_pred = 0;
 469     int pred_ctx = CTX_ZPZN_F1;
 470
 471     /* Check if the parent subband has a 0 in the corresponding position */
 472     if (b->parent)
 473         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 474
 475     if (b->orientation == subband_hl)
 476         sign_pred = buf[-b->stride];
 477
 478     /* Determine if the pixel has only zeros in its neighbourhood */
 479     if (x) {
 480         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 481         if (b->orientation == subband_lh)
 482             sign_pred = buf[-1];
 483     } else {
 484         pred_ctx += !buf[-b->stride];
 485     }
 486
 487     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 488     if (coeff) {
 489         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 490         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 491         coeff = (coeff ^ -sign) + sign;
 492     }
 493     *buf = coeff;
 494 }
 495
 496 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 497 {
 498     int sign, coeff;
 499
 500     coeff = svq3_get_ue_golomb(gb);
 501     if (coeff) {
 502         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 503         sign  = get_bits1(gb);
 504         coeff = (coeff ^ -sign) + sign;
 505     }
 506     return coeff;
 507 }
 508
 509 /**
 510  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 511  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 512  */
 513 static inline void codeblock(DiracContext *s, SubBand *b,
 514                              GetBitContext *gb, DiracArith *c,
 515                              int left, int right, int top, int bottom,
 516                              int blockcnt_one, int is_arith)
 517 {
 518     int x, y, zero_block;
 519     int qoffset, qfactor;
 520     IDWTELEM *buf;
 521
 522     /* check for any coded coefficients in this codeblock */
 523     if (!blockcnt_one) {
 524         if (is_arith)
 525             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 526         else
 527             zero_block = get_bits1(gb);
 528
 529         if (zero_block)
 530             return;
 531     }
 532
 533     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 534         int quant = b->quant;
 535         if (is_arith)
 536             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 537         else
 538             quant += dirac_get_se_golomb(gb);
 539         if (quant < 0) {
 540             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 541             return;
 542         }
 543         b->quant = quant;
 544     }
 545
 546     b->quant = FFMIN(b->quant, MAX_QUANT);
 547
 548     qfactor = qscale_tab[b->quant];
 549     /* TODO: context pointer? */
 550     if (!s->num_refs)
 551         qoffset = qoffset_intra_tab[b->quant];
 552     else
 553         qoffset = qoffset_inter_tab[b->quant];
 554
 555     buf = b->ibuf + top * b->stride;
 556     for (y = top; y < bottom; y++) {
 557         for (x = left; x < right; x++) {
 558             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 559             if (is_arith)
 560                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 561             else
 562                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 563         }
 564         buf += b->stride;
 565     }
 566 }
 567
 568 /**
 569  * Dirac Specification ->
 570  * 13.3 intra_dc_prediction(band)
 571  */
 572 static inline void intra_dc_prediction(SubBand *b)
 573 {
 574     IDWTELEM *buf = b->ibuf;
 575     int x, y;
 576
 577     for (x = 1; x < b->width; x++)
 578         buf[x] += buf[x-1];
 579     buf += b->stride;
 580
 581     for (y = 1; y < b->height; y++) {
 582         buf[0] += buf[-b->stride];
 583
 584         for (x = 1; x < b->width; x++) {
 585             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 586             buf[x]  += divide3(pred);
 587         }
 588         buf += b->stride;
 589     }
 590 }
 591
 592 /**
 593  * Dirac Specification ->
 594  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 595  */
 596 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 597 {
 598     int cb_x, cb_y, left, right, top, bottom;
 599     DiracArith c;
 600     GetBitContext gb;
 601     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 602     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 603     int blockcnt_one = (cb_width + cb_height) == 2;
 604
 605     if (!b->length)
 606         return;
 607
 608     init_get_bits8(&gb, b->coeff_data, b->length);
 609
 610     if (is_arith)
 611         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 612
 613     top = 0;
 614     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 615         bottom = (b->height * (cb_y+1)) / cb_height;
 616         left = 0;
 617         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 618             right = (b->width * (cb_x+1)) / cb_width;
 619             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 620             left = right;
 621         }
 622         top = bottom;
 623     }
 624
 625     if (b->orientation == subband_ll && s->num_refs == 0)
 626         intra_dc_prediction(b);
 627 }
 628
 629 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 630 {
 631     DiracContext *s = avctx->priv_data;
 632     decode_subband_internal(s, b, 1);
 633     return 0;
 634 }
 635
 636 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 637 {
 638     DiracContext *s = avctx->priv_data;
 639     SubBand **b     = arg;
 640     decode_subband_internal(s, *b, 0);
 641     return 0;
 642 }
 643
 644 /**
 645  * Dirac Specification ->
 646  * [DIRAC_STD] 13.4.1 core_transform_data()
 647  */
 648 static void decode_component(DiracContext *s, int comp)
 649 {
 650     AVCodecContext *avctx = s->avctx;
 651     SubBand *bands[3*MAX_DWT_LEVELS+1];
 652     enum dirac_subband orientation;
 653     int level, num_bands = 0;
 654
 655     /* Unpack all subbands at all levels. */
 656     for (level = 0; level < s->wavelet_depth; level++) {
 657         for (orientation = !!level; orientation < 4; orientation++) {
 658             SubBand *b = &s->plane[comp].band[level][orientation];
 659             bands[num_bands++] = b;
 660
 661             align_get_bits(&s->gb);
 662             /* [DIRAC_STD] 13.4.2 subband() */
 663             b->length = svq3_get_ue_golomb(&s->gb);
 664             if (b->length) {
 665                 b->quant = svq3_get_ue_golomb(&s->gb);
 666                 align_get_bits(&s->gb);
 667                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 668                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 669                 skip_bits_long(&s->gb, b->length*8);
 670             }
 671         }
 672         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 673         if (s->is_arith)
 674             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 675                            NULL, 4-!!level, sizeof(SubBand));
 676     }
 677     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 678     if (!s->is_arith)
 679         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 680 }
 681
 682 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 683 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 684 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 685                              int slice_x, int slice_y, int bits_end,
 686                              SubBand *b1, SubBand *b2)
 687 {
 688     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 689     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 690     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 691     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 692
 693     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 694     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 695
 696     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 697     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 698     int x, y;
 699     /* we have to constantly check for overread since the spec explicitly
 700        requires this, with the meaning that all remaining coeffs are set to 0 */
 701     if (get_bits_count(gb) >= bits_end)
 702         return;
 703
 704     for (y = top; y < bottom; y++) {
 705         for (x = left; x < right; x++) {
 706             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 707             if (get_bits_count(gb) >= bits_end)
 708                 return;
 709             if (buf2) {
 710                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 711                 if (get_bits_count(gb) >= bits_end)
 712                     return;
 713             }
 714         }
 715         buf1 += b1->stride;
 716         if (buf2)
 717             buf2 += b2->stride;
 718     }
 719 }
 720
 721 struct lowdelay_slice {
 722     GetBitContext gb;
 723     int slice_x;
 724     int slice_y;
 725     int bytes;
 726 };
 727
 728
 729 /**
 730  * Dirac Specification ->
 731  * 13.5.2 Slices. slice(sx,sy)
 732  */
 733 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 734 {
 735     DiracContext *s = avctx->priv_data;
 736     struct lowdelay_slice *slice = arg;
 737     GetBitContext *gb = &slice->gb;
 738     enum dirac_subband orientation;
 739     int level, quant, chroma_bits, chroma_end;
 740
 741     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 742     int length_bits = av_log2(8 * slice->bytes)+1;
 743     int luma_bits   = get_bits_long(gb, length_bits);
 744     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 745
 746     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 747     for (level = 0; level < s->wavelet_depth; level++)
 748         for (orientation = !!level; orientation < 4; orientation++) {
 749             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 750             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 751                              &s->plane[0].band[level][orientation], NULL);
 752         }
 753
 754     /* consume any unused bits from luma */
 755     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 756
 757     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 758     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 759     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 760     for (level = 0; level < s->wavelet_depth; level++)
 761         for (orientation = !!level; orientation < 4; orientation++) {
 762             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 763             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 764                              &s->plane[1].band[level][orientation],
 765                              &s->plane[2].band[level][orientation]);
 766         }
 767
 768     return 0;
 769 }
 770
 771 /**
 772  * Dirac Specification ->
 773  * 13.5.1 low_delay_transform_data()
 774  */
 775 static void decode_lowdelay(DiracContext *s)
 776 {
 777     AVCodecContext *avctx = s->avctx;
 778     int slice_x, slice_y, bytes, bufsize;
 779     const uint8_t *buf;
 780     struct lowdelay_slice *slices;
 781     int slice_num = 0;
 782
 783     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 784
 785     align_get_bits(&s->gb);
 786     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 787     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 788     bufsize = get_bits_left(&s->gb);
 789
 790     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 791         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 792             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 793                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 794
 795             slices[slice_num].bytes   = bytes;
 796             slices[slice_num].slice_x = slice_x;
 797             slices[slice_num].slice_y = slice_y;
 798             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 799             slice_num++;
 800
 801             buf     += bytes;
 802             bufsize -= bytes*8;
 803         }
 804
 805     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 806                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 807     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 808     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 809     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 810     av_free(slices);
 811 }
 812
 813 static void init_planes(DiracContext *s)
 814 {
 815     int i, w, h, level, orientation;
 816
 817     for (i = 0; i < 3; i++) {
 818         Plane *p = &s->plane[i];
 819
 820         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 821         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 822         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 823         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 824         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 825
 826         for (level = s->wavelet_depth-1; level >= 0; level--) {
 827             w = w>>1;
 828             h = h>>1;
 829             for (orientation = !!level; orientation < 4; orientation++) {
 830                 SubBand *b = &p->band[level][orientation];
 831
 832                 b->ibuf   = p->idwt_buf;
 833                 b->level  = level;
 834                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 835                 b->width  = w;
 836                 b->height = h;
 837                 b->orientation = orientation;
 838
 839                 if (orientation & 1)
 840                     b->ibuf += w;
 841                 if (orientation > 1)
 842                     b->ibuf += b->stride>>1;
 843
 844                 if (level)
 845                     b->parent = &p->band[level-1][orientation];
 846             }
 847         }
 848
 849         if (i > 0) {
 850             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 851             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 852             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 853             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 854         }
 855
 856         p->xoffset = (p->xblen - p->xbsep)/2;
 857         p->yoffset = (p->yblen - p->ybsep)/2;
 858     }
 859 }
 860
 861 /**
 862  * Unpack the motion compensation parameters
 863  * Dirac Specification ->
 864  * 11.2 Picture prediction data. picture_prediction()
 865  */
 866 static int dirac_unpack_prediction_parameters(DiracContext *s)
 867 {
 868     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 869     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 870
 871     GetBitContext *gb = &s->gb;
 872     unsigned idx, ref;
 873
 874     align_get_bits(gb);
 875     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 876     /* Luma and Chroma are equal. 11.2.3 */
 877     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 878
 879     if (idx > 4) {
 880         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 881         return -1;
 882     }
 883
 884     if (idx == 0) {
 885         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 886         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 887         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 888         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 889     } else {
 890         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 891         s->plane[0].xblen = default_blen[idx-1];
 892         s->plane[0].yblen = default_blen[idx-1];
 893         s->plane[0].xbsep = default_bsep[idx-1];
 894         s->plane[0].ybsep = default_bsep[idx-1];
 895     }
 896     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 897       Calculated in function dirac_unpack_block_motion_data */
 898
 899     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 900         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 901         return -1;
 902     }
 903     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 904         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 905         return -1;
 906     }
 907     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 908         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 909         return -1;
 910     }
 911
 912     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 913       Read motion vector precision */
 914     s->mv_precision = svq3_get_ue_golomb(gb);
 915     if (s->mv_precision > 3) {
 916         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 917         return -1;
 918     }
 919
 920     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 921       Read the global motion compensation parameters */
 922     s->globalmc_flag = get_bits1(gb);
 923     if (s->globalmc_flag) {
 924         memset(s->globalmc, 0, sizeof(s->globalmc));
 925         /* [DIRAC_STD] pan_tilt(gparams) */
 926         for (ref = 0; ref < s->num_refs; ref++) {
 927             if (get_bits1(gb)) {
 928                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 929                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 930             }
 931             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 932                zoom/rotation/shear parameters */
 933             if (get_bits1(gb)) {
 934                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 935                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 936                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 937                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 938                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 939             } else {
 940                 s->globalmc[ref].zrs[0][0] = 1;
 941                 s->globalmc[ref].zrs[1][1] = 1;
 942             }
 943             /* [DIRAC_STD] perspective(gparams) */
 944             if (get_bits1(gb)) {
 945                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 946                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 947                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 948             }
 949         }
 950     }
 951
 952     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 953       Picture prediction mode, not currently used. */
 954     if (svq3_get_ue_golomb(gb)) {
 955         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 956         return -1;
 957     }
 958
 959     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 960        just data read, weight calculation will be done later on. */
 961     s->weight_log2denom = 1;
 962     s->weight[0]        = 1;
 963     s->weight[1]        = 1;
 964
 965     if (get_bits1(gb)) {
 966         s->weight_log2denom = svq3_get_ue_golomb(gb);
 967         s->weight[0] = dirac_get_se_golomb(gb);
 968         if (s->num_refs == 2)
 969             s->weight[1] = dirac_get_se_golomb(gb);
 970     }
 971     return 0;
 972 }
 973
 974 /**
 975  * Dirac Specification ->
 976  * 11.3 Wavelet transform data. wavelet_transform()
 977  */
 978 static int dirac_unpack_idwt_params(DiracContext *s)
 979 {
 980     GetBitContext *gb = &s->gb;
 981     int i, level;
 982     unsigned tmp;
 983
 984 #define CHECKEDREAD(dst, cond, errmsg) \
 985     tmp = svq3_get_ue_golomb(gb); \
 986     if (cond) { \
 987         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
 988         return -1; \
 989     }\
 990     dst = tmp;
 991
 992     align_get_bits(gb);
 993
 994     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
 995     if (s->zero_res)
 996         return 0;
 997
 998     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
 999     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1000
1001     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1002
1003     if (!s->low_delay) {
1004         /* Codeblock parameters (core syntax only) */
1005         if (get_bits1(gb)) {
1006             for (i = 0; i <= s->wavelet_depth; i++) {
1007                 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1008                 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1009             }
1010
1011             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1012         } else
1013             for (i = 0; i <= s->wavelet_depth; i++)
1014                 s->codeblock[i].width = s->codeblock[i].height = 1;
1015     } else {
1016         /* Slice parameters + quantization matrix*/
1017         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1018         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1019         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1020         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1021         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1022
1023         if (s->lowdelay.bytes.den <= 0) {
1024             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1025             return AVERROR_INVALIDDATA;
1026         }
1027
1028         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1029         if (get_bits1(gb)) {
1030             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1031             /* custom quantization matrix */
1032             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1033             for (level = 0; level < s->wavelet_depth; level++) {
1034                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1035                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1036                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1037             }
1038         } else {
1039             if (s->wavelet_depth > 4) {
1040                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1041                 return AVERROR_INVALIDDATA;
1042             }
1043             /* default quantization matrix */
1044             for (level = 0; level < s->wavelet_depth; level++)
1045                 for (i = 0; i < 4; i++) {
1046                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1047                     /* haar with no shift differs for different depths */
1048                     if (s->wavelet_idx == 3)
1049                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1050                 }
1051         }
1052     }
1053     return 0;
1054 }
1055
1056 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1057 {
1058     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1059
1060     if (!(x|y))
1061         return 0;
1062     else if (!y)
1063         return sbsplit[-1];
1064     else if (!x)
1065         return sbsplit[-stride];
1066
1067     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1068 }
1069
1070 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1071 {
1072     int pred;
1073
1074     if (!(x|y))
1075         return 0;
1076     else if (!y)
1077         return block[-1].ref & refmask;
1078     else if (!x)
1079         return block[-stride].ref & refmask;
1080
1081     /* return the majority */
1082     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1083     return (pred >> 1) & refmask;
1084 }
1085
1086 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1087 {
1088     int i, n = 0;
1089
1090     memset(block->u.dc, 0, sizeof(block->u.dc));
1091
1092     if (x && !(block[-1].ref & 3)) {
1093         for (i = 0; i < 3; i++)
1094             block->u.dc[i] += block[-1].u.dc[i];
1095         n++;
1096     }
1097
1098     if (y && !(block[-stride].ref & 3)) {
1099         for (i = 0; i < 3; i++)
1100             block->u.dc[i] += block[-stride].u.dc[i];
1101         n++;
1102     }
1103
1104     if (x && y && !(block[-1-stride].ref & 3)) {
1105         for (i = 0; i < 3; i++)
1106             block->u.dc[i] += block[-1-stride].u.dc[i];
1107         n++;
1108     }
1109
1110     if (n == 2) {
1111         for (i = 0; i < 3; i++)
1112             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1113     } else if (n == 3) {
1114         for (i = 0; i < 3; i++)
1115             block->u.dc[i] = divide3(block->u.dc[i]);
1116     }
1117 }
1118
1119 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1120 {
1121     int16_t *pred[3];
1122     int refmask = ref+1;
1123     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1124     int n = 0;
1125
1126     if (x && (block[-1].ref & mask) == refmask)
1127         pred[n++] = block[-1].u.mv[ref];
1128
1129     if (y && (block[-stride].ref & mask) == refmask)
1130         pred[n++] = block[-stride].u.mv[ref];
1131
1132     if (x && y && (block[-stride-1].ref & mask) == refmask)
1133         pred[n++] = block[-stride-1].u.mv[ref];
1134
1135     switch (n) {
1136     case 0:
1137         block->u.mv[ref][0] = 0;
1138         block->u.mv[ref][1] = 0;
1139         break;
1140     case 1:
1141         block->u.mv[ref][0] = pred[0][0];
1142         block->u.mv[ref][1] = pred[0][1];
1143         break;
1144     case 2:
1145         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1146         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1147         break;
1148     case 3:
1149         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1150         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1151         break;
1152     }
1153 }
1154
1155 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1156 {
1157     int ez      = s->globalmc[ref].zrs_exp;
1158     int ep      = s->globalmc[ref].perspective_exp;
1159     int (*A)[2] = s->globalmc[ref].zrs;
1160     int *b      = s->globalmc[ref].pan_tilt;
1161     int *c      = s->globalmc[ref].perspective;
1162
1163     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1164     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1165     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1166
1167     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1168     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1169 }
1170
1171 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1172                                 int stride, int x, int y)
1173 {
1174     int i;
1175
1176     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1177     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1178
1179     if (s->num_refs == 2) {
1180         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1181         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1182     }
1183
1184     if (!block->ref) {
1185         pred_block_dc(block, stride, x, y);
1186         for (i = 0; i < 3; i++)
1187             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1188         return;
1189     }
1190
1191     if (s->globalmc_flag) {
1192         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1193         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1194     }
1195
1196     for (i = 0; i < s->num_refs; i++)
1197         if (block->ref & (i+1)) {
1198             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1199                 global_mv(s, block, x, y, i);
1200             } else {
1201                 pred_mv(block, stride, x, y, i);
1202                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1203                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1204             }
1205         }
1206 }
1207
1208 /**
1209  * Copies the current block to the other blocks covered by the current superblock split mode
1210  */
1211 static void propagate_block_data(DiracBlock *block, int stride, int size)
1212 {
1213     int x, y;
1214     DiracBlock *dst = block;
1215
1216     for (x = 1; x < size; x++)
1217         dst[x] = *block;
1218
1219     for (y = 1; y < size; y++) {
1220         dst += stride;
1221         for (x = 0; x < size; x++)
1222             dst[x] = *block;
1223     }
1224 }
1225
1226 /**
1227  * Dirac Specification ->
1228  * 12. Block motion data syntax
1229  */
1230 static int dirac_unpack_block_motion_data(DiracContext *s)
1231 {
1232     GetBitContext *gb = &s->gb;
1233     uint8_t *sbsplit = s->sbsplit;
1234     int i, x, y, q, p;
1235     DiracArith arith[8];
1236
1237     align_get_bits(gb);
1238
1239     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1240     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1241     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1242     s->blwidth  = 4 * s->sbwidth;
1243     s->blheight = 4 * s->sbheight;
1244
1245     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1246        decode superblock split modes */
1247     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1248     for (y = 0; y < s->sbheight; y++) {
1249         for (x = 0; x < s->sbwidth; x++) {
1250             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1251             if (split > 2)
1252                 return -1;
1253             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1254         }
1255         sbsplit += s->sbwidth;
1256     }
1257
1258     /* setup arith decoding */
1259     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1260     for (i = 0; i < s->num_refs; i++) {
1261         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1262         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1263     }
1264     for (i = 0; i < 3; i++)
1265         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1266
1267     for (y = 0; y < s->sbheight; y++)
1268         for (x = 0; x < s->sbwidth; x++) {
1269             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1270             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1271
1272             for (q = 0; q < blkcnt; q++)
1273                 for (p = 0; p < blkcnt; p++) {
1274                     int bx = 4 * x + p*step;
1275                     int by = 4 * y + q*step;
1276                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1277                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1278                     propagate_block_data(block, s->blwidth, step);
1279                 }
1280         }
1281
1282     return 0;
1283 }
1284
1285 static int weight(int i, int blen, int offset)
1286 {
1287 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1288     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1289
1290     if (i < 2*offset)
1291         return ROLLOFF(i);
1292     else if (i > blen-1 - 2*offset)
1293         return ROLLOFF(blen-1 - i);
1294     return 8;
1295 }
1296
1297 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1298                                  int left, int right, int wy)
1299 {
1300     int x;
1301     for (x = 0; left && x < p->xblen >> 1; x++)
1302         obmc_weight[x] = wy*8;
1303     for (; x < p->xblen >> right; x++)
1304         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1305     for (; x < p->xblen; x++)
1306         obmc_weight[x] = wy*8;
1307     for (; x < stride; x++)
1308         obmc_weight[x] = 0;
1309 }
1310
1311 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1312                              int left, int right, int top, int bottom)
1313 {
1314     int y;
1315     for (y = 0; top && y < p->yblen >> 1; y++) {
1316         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1317         obmc_weight += stride;
1318     }
1319     for (; y < p->yblen >> bottom; y++) {
1320         int wy = weight(y, p->yblen, p->yoffset);
1321         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1322         obmc_weight += stride;
1323     }
1324     for (; y < p->yblen; y++) {
1325         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1326         obmc_weight += stride;
1327     }
1328 }
1329
1330 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1331 {
1332     int top = !by;
1333     int bottom = by == s->blheight-1;
1334
1335     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1336     if (top || bottom || by == 1) {
1337         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1338         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1339         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1340     }
1341 }
1342
1343 static const uint8_t epel_weights[4][4][4] = {
1344     {{ 16,  0,  0,  0 },
1345      { 12,  4,  0,  0 },
1346      {  8,  8,  0,  0 },
1347      {  4, 12,  0,  0 }},
1348     {{ 12,  0,  4,  0 },
1349      {  9,  3,  3,  1 },
1350      {  6,  6,  2,  2 },
1351      {  3,  9,  1,  3 }},
1352     {{  8,  0,  8,  0 },
1353      {  6,  2,  6,  2 },
1354      {  4,  4,  4,  4 },
1355      {  2,  6,  2,  6 }},
1356     {{  4,  0, 12,  0 },
1357      {  3,  1,  9,  3 },
1358      {  2,  2,  6,  6 },
1359      {  1,  3,  3,  9 }}
1360 };
1361
1362 /**
1363  * For block x,y, determine which of the hpel planes to do bilinear
1364  * interpolation from and set src[] to the location in each hpel plane
1365  * to MC from.
1366  *
1367  * @return the index of the put_dirac_pixels_tab function to use
1368  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1369  */
1370 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1371                      int x, int y, int ref, int plane)
1372 {
1373     Plane *p = &s->plane[plane];
1374     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1375     int motion_x = block->u.mv[ref][0];
1376     int motion_y = block->u.mv[ref][1];
1377     int mx, my, i, epel, nplanes = 0;
1378
1379     if (plane) {
1380         motion_x >>= s->chroma_x_shift;
1381         motion_y >>= s->chroma_y_shift;
1382     }
1383
1384     mx         = motion_x & ~(-1U << s->mv_precision);
1385     my         = motion_y & ~(-1U << s->mv_precision);
1386     motion_x >>= s->mv_precision;
1387     motion_y >>= s->mv_precision;
1388     /* normalize subpel coordinates to epel */
1389     /* TODO: template this function? */
1390     mx      <<= 3 - s->mv_precision;
1391     my      <<= 3 - s->mv_precision;
1392
1393     x += motion_x;
1394     y += motion_y;
1395     epel = (mx|my)&1;
1396
1397     /* hpel position */
1398     if (!((mx|my)&3)) {
1399         nplanes = 1;
1400         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1401     } else {
1402         /* qpel or epel */
1403         nplanes = 4;
1404         for (i = 0; i < 4; i++)
1405             src[i] = ref_hpel[i] + y*p->stride + x;
1406
1407         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1408            we increment x/y because the edge changes for half of the pixels */
1409         if (mx > 4) {
1410             src[0] += 1;
1411             src[2] += 1;
1412             x++;
1413         }
1414         if (my > 4) {
1415             src[0] += p->stride;
1416             src[1] += p->stride;
1417             y++;
1418         }
1419
1420         /* hpel planes are:
1421            [0]: F  [1]: H
1422            [2]: V  [3]: C */
1423         if (!epel) {
1424             /* check if we really only need 2 planes since either mx or my is
1425                a hpel position. (epel weights of 0 handle this there) */
1426             if (!(mx&3)) {
1427                 /* mx == 0: average [0] and [2]
1428                    mx == 4: average [1] and [3] */
1429                 src[!mx] = src[2 + !!mx];
1430                 nplanes = 2;
1431             } else if (!(my&3)) {
1432                 src[0] = src[(my>>1)  ];
1433                 src[1] = src[(my>>1)+1];
1434                 nplanes = 2;
1435             }
1436         } else {
1437             /* adjust the ordering if needed so the weights work */
1438             if (mx > 4) {
1439                 FFSWAP(const uint8_t *, src[0], src[1]);
1440                 FFSWAP(const uint8_t *, src[2], src[3]);
1441             }
1442             if (my > 4) {
1443                 FFSWAP(const uint8_t *, src[0], src[2]);
1444                 FFSWAP(const uint8_t *, src[1], src[3]);
1445             }
1446             src[4] = epel_weights[my&3][mx&3];
1447         }
1448     }
1449
1450     /* fixme: v/h _edge_pos */
1451     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1452         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1453         x < 0 || y < 0) {
1454         for (i = 0; i < nplanes; i++) {
1455             s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1456                                      p->stride, p->stride,
1457                                      p->xblen, p->yblen, x, y,
1458                                      p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1459             src[i] = s->edge_emu_buffer[i];
1460         }
1461     }
1462     return (nplanes>>1) + epel;
1463 }
1464
1465 static void add_dc(uint16_t *dst, int dc, int stride,
1466                    uint8_t *obmc_weight, int xblen, int yblen)
1467 {
1468     int x, y;
1469     dc += 128;
1470
1471     for (y = 0; y < yblen; y++) {
1472         for (x = 0; x < xblen; x += 2) {
1473             dst[x  ] += dc * obmc_weight[x  ];
1474             dst[x+1] += dc * obmc_weight[x+1];
1475         }
1476         dst          += stride;
1477         obmc_weight  += MAX_BLOCKSIZE;
1478     }
1479 }
1480
1481 static void block_mc(DiracContext *s, DiracBlock *block,
1482                      uint16_t *mctmp, uint8_t *obmc_weight,
1483                      int plane, int dstx, int dsty)
1484 {
1485     Plane *p = &s->plane[plane];
1486     const uint8_t *src[5];
1487     int idx;
1488
1489     switch (block->ref&3) {
1490     case 0: /* DC */
1491         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1492         return;
1493     case 1:
1494     case 2:
1495         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1496         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1497         if (s->weight_func)
1498             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1499                            s->weight[0] + s->weight[1], p->yblen);
1500         break;
1501     case 3:
1502         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1503         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1504         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1505         if (s->biweight_func) {
1506             /* fixme: +32 is a quick hack */
1507             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1508             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1509                              s->weight[0], s->weight[1], p->yblen);
1510         } else
1511             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1512         break;
1513     }
1514     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1515 }
1516
1517 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1518 {
1519     Plane *p = &s->plane[plane];
1520     int x, dstx = p->xbsep - p->xoffset;
1521
1522     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1523     mctmp += p->xbsep;
1524
1525     for (x = 1; x < s->blwidth-1; x++) {
1526         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1527         dstx  += p->xbsep;
1528         mctmp += p->xbsep;
1529     }
1530     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1531 }
1532
1533 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1534 {
1535     int idx = 0;
1536     if (xblen > 8)
1537         idx = 1;
1538     if (xblen > 16)
1539         idx = 2;
1540
1541     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1542     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1543     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1544     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1545         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1546         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1547     } else {
1548         s->weight_func   = NULL;
1549         s->biweight_func = NULL;
1550     }
1551 }
1552
1553 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1554 {
1555     /* chroma allocates an edge of 8 when subsampled
1556        which for 4:2:2 means an h edge of 16 and v edge of 8
1557        just use 8 for everything for the moment */
1558     int i, edge = EDGE_WIDTH/2;
1559
1560     ref->hpel[plane][0] = ref->avframe->data[plane];
1561     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1562
1563     /* no need for hpel if we only have fpel vectors */
1564     if (!s->mv_precision)
1565         return;
1566
1567     for (i = 1; i < 4; i++) {
1568         if (!ref->hpel_base[plane][i])
1569             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1570         /* we need to be 16-byte aligned even for chroma */
1571         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1572     }
1573
1574     if (!ref->interpolated[plane]) {
1575         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1576                                       ref->hpel[plane][3], ref->hpel[plane][0],
1577                                       ref->avframe->linesize[plane], width, height);
1578         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1579         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1580         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1581     }
1582     ref->interpolated[plane] = 1;
1583 }
1584
1585 /**
1586  * Dirac Specification ->
1587  * 13.0 Transform data syntax. transform_data()
1588  */
1589 static int dirac_decode_frame_internal(DiracContext *s)
1590 {
1591     DWTContext d;
1592     int y, i, comp, dsty;
1593
1594     if (s->low_delay) {
1595         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1596         for (comp = 0; comp < 3; comp++) {
1597             Plane *p = &s->plane[comp];
1598             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1599         }
1600         if (!s->zero_res)
1601             decode_lowdelay(s);
1602     }
1603
1604     for (comp = 0; comp < 3; comp++) {
1605         Plane *p       = &s->plane[comp];
1606         uint8_t *frame = s->current_picture->avframe->data[comp];
1607
1608         /* FIXME: small resolutions */
1609         for (i = 0; i < 4; i++)
1610             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1611
1612         if (!s->zero_res && !s->low_delay)
1613         {
1614             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1615             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1616         }
1617         if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1618                                   s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1619             return -1;
1620
1621         if (!s->num_refs) { /* intra */
1622             for (y = 0; y < p->height; y += 16) {
1623                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1624                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1625                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1626             }
1627         } else { /* inter */
1628             int rowheight = p->ybsep*p->stride;
1629
1630             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1631
1632             for (i = 0; i < s->num_refs; i++)
1633                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1634
1635             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1636
1637             dsty = -p->yoffset;
1638             for (y = 0; y < s->blheight; y++) {
1639                 int h     = 0,
1640                     start = FFMAX(dsty, 0);
1641                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1642                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1643
1644                 init_obmc_weights(s, p, y);
1645
1646                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1647                     h = p->height - start;
1648                 else
1649                     h = p->ybsep - (start - dsty);
1650                 if (h < 0)
1651                     break;
1652
1653                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1654                 mc_row(s, blocks, mctmp, comp, dsty);
1655
1656                 mctmp += (start - dsty)*p->stride + p->xoffset;
1657                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1658                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1659                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1660
1661                 dsty += p->ybsep;
1662             }
1663         }
1664     }
1665
1666
1667     return 0;
1668 }
1669
1670 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1671 {
1672     int ret, i;
1673     int chroma_x_shift, chroma_y_shift;
1674     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1675
1676     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1677     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1678     ret = ff_get_buffer(avctx, f, flags);
1679     if (ret < 0)
1680         return ret;
1681
1682     for (i = 0; f->data[i]; i++) {
1683         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1684                      f->linesize[i] + 32;
1685         f->data[i] += offset;
1686     }
1687     f->width  = avctx->width;
1688     f->height = avctx->height;
1689
1690     return 0;
1691 }
1692
1693 /**
1694  * Dirac Specification ->
1695  * 11.1.1 Picture Header. picture_header()
1696  */
1697 static int dirac_decode_picture_header(DiracContext *s)
1698 {
1699     int retire, picnum;
1700     int i, j, refnum, refdist;
1701     GetBitContext *gb = &s->gb;
1702
1703     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1704     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1705
1706
1707     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1708
1709     /* if this is the first keyframe after a sequence header, start our
1710        reordering from here */
1711     if (s->frame_number < 0)
1712         s->frame_number = picnum;
1713
1714     s->ref_pics[0] = s->ref_pics[1] = NULL;
1715     for (i = 0; i < s->num_refs; i++) {
1716         refnum = picnum + dirac_get_se_golomb(gb);
1717         refdist = INT_MAX;
1718
1719         /* find the closest reference to the one we want */
1720         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1721         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1722             if (s->ref_frames[j]
1723                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1724                 s->ref_pics[i] = s->ref_frames[j];
1725                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1726             }
1727
1728         if (!s->ref_pics[i] || refdist)
1729             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1730
1731         /* if there were no references at all, allocate one */
1732         if (!s->ref_pics[i])
1733             for (j = 0; j < MAX_FRAMES; j++)
1734                 if (!s->all_frames[j].avframe->data[0]) {
1735                     s->ref_pics[i] = &s->all_frames[j];
1736                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1737                     break;
1738                 }
1739     }
1740
1741     /* retire the reference frames that are not used anymore */
1742     if (s->current_picture->avframe->reference) {
1743         retire = picnum + dirac_get_se_golomb(gb);
1744         if (retire != picnum) {
1745             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1746
1747             if (retire_pic)
1748                 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1749             else
1750                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1751         }
1752
1753         /* if reference array is full, remove the oldest as per the spec */
1754         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1755             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1756             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1757         }
1758     }
1759
1760     if (s->num_refs) {
1761         if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1762             return -1;
1763         if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
1764             return -1;
1765     }
1766     if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1767         return -1;
1768
1769     init_planes(s);
1770     return 0;
1771 }
1772
1773 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1774 {
1775     DiracFrame *out = s->delay_frames[0];
1776     int i, out_idx  = 0;
1777     int ret;
1778
1779     /* find frame with lowest picture number */
1780     for (i = 1; s->delay_frames[i]; i++)
1781         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1782             out     = s->delay_frames[i];
1783             out_idx = i;
1784         }
1785
1786     for (i = out_idx; s->delay_frames[i]; i++)
1787         s->delay_frames[i] = s->delay_frames[i+1];
1788
1789     if (out) {
1790         out->avframe->reference ^= DELAYED_PIC_REF;
1791         *got_frame = 1;
1792         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1793             return ret;
1794     }
1795
1796     return 0;
1797 }
1798
1799 /**
1800  * Dirac Specification ->
1801  * 9.6 Parse Info Header Syntax. parse_info()
1802  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1803  */
1804 #define DATA_UNIT_HEADER_SIZE 13
1805
1806 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1807    inside the function parse_sequence() */
1808 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1809 {
1810     DiracContext *s   = avctx->priv_data;
1811     DiracFrame *pic   = NULL;
1812     int ret, i, parse_code = buf[4];
1813     unsigned tmp;
1814
1815     if (size < DATA_UNIT_HEADER_SIZE)
1816         return -1;
1817
1818     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1819
1820     if (parse_code == pc_seq_header) {
1821         if (s->seen_sequence_header)
1822             return 0;
1823
1824         /* [DIRAC_STD] 10. Sequence header */
1825         if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1826             return -1;
1827
1828         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1829
1830         if (alloc_sequence_buffers(s))
1831             return -1;
1832
1833         s->seen_sequence_header = 1;
1834     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1835         free_sequence_buffers(s);
1836         s->seen_sequence_header = 0;
1837     } else if (parse_code == pc_aux_data) {
1838         if (buf[13] == 1) {     /* encoder implementation/version */
1839             int ver[3];
1840             /* versions older than 1.0.8 don't store quant delta for
1841                subbands with only one codeblock */
1842             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1843                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1844                     s->old_delta_quant = 1;
1845         }
1846     } else if (parse_code & 0x8) {  /* picture data unit */
1847         if (!s->seen_sequence_header) {
1848             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1849             return -1;
1850         }
1851
1852         /* find an unused frame */
1853         for (i = 0; i < MAX_FRAMES; i++)
1854             if (s->all_frames[i].avframe->data[0] == NULL)
1855                 pic = &s->all_frames[i];
1856         if (!pic) {
1857             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1858             return -1;
1859         }
1860
1861         av_frame_unref(pic->avframe);
1862
1863         /* [DIRAC_STD] Defined in 9.6.1 ... */
1864         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1865         if (tmp > 2) {
1866             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1867             return -1;
1868         }
1869         s->num_refs    = tmp;
1870         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1871         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1872         pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1873         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1874         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1875
1876         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1877             return ret;
1878         s->current_picture = pic;
1879         s->plane[0].stride = pic->avframe->linesize[0];
1880         s->plane[1].stride = pic->avframe->linesize[1];
1881         s->plane[2].stride = pic->avframe->linesize[2];
1882
1883         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1884             return AVERROR(ENOMEM);
1885
1886         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1887         if (dirac_decode_picture_header(s))
1888             return -1;
1889
1890         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1891         if (dirac_decode_frame_internal(s))
1892             return -1;
1893     }
1894     return 0;
1895 }
1896
1897 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1898 {
1899     DiracContext *s     = avctx->priv_data;
1900     AVFrame *picture    = data;
1901     uint8_t *buf        = pkt->data;
1902     int buf_size        = pkt->size;
1903     int i, data_unit_size, buf_idx = 0;
1904     int ret;
1905
1906     /* release unused frames */
1907     for (i = 0; i < MAX_FRAMES; i++)
1908         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1909             av_frame_unref(s->all_frames[i].avframe);
1910             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1911         }
1912
1913     s->current_picture = NULL;
1914     *got_frame = 0;
1915
1916     /* end of stream, so flush delayed pics */
1917     if (buf_size == 0)
1918         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1919
1920     for (;;) {
1921         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1922           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1923           BBCD start code search */
1924         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1925             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1926                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1927                 break;
1928         }
1929         /* BBCD found or end of data */
1930         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1931             break;
1932
1933         data_unit_size = AV_RB32(buf+buf_idx+5);
1934         if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1935             if(buf_idx + data_unit_size > buf_size)
1936             av_log(s->avctx, AV_LOG_ERROR,
1937                    "Data unit with size %d is larger than input buffer, discarding\n",
1938                    data_unit_size);
1939             buf_idx += 4;
1940             continue;
1941         }
1942         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1943         if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1944         {
1945             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1946             return -1;
1947         }
1948         buf_idx += data_unit_size;
1949     }
1950
1951     if (!s->current_picture)
1952         return buf_size;
1953
1954     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1955         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1956
1957         s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1958
1959         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1960             int min_num = s->delay_frames[0]->avframe->display_picture_number;
1961             /* Too many delayed frames, so we display the frame with the lowest pts */
1962             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1963
1964             for (i = 1; s->delay_frames[i]; i++)
1965                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1966                     min_num = s->delay_frames[i]->avframe->display_picture_number;
1967
1968             delayed_frame = remove_frame(s->delay_frames, min_num);
1969             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1970         }
1971
1972         if (delayed_frame) {
1973             delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1974             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1975                 return ret;
1976             *got_frame = 1;
1977         }
1978     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1979         /* The right frame at the right time :-) */
1980         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1981             return ret;
1982         *got_frame = 1;
1983     }
1984
1985     if (*got_frame)
1986         s->frame_number = picture->display_picture_number + 1;
1987
1988     return buf_idx;
1989 }
1990
1991 AVCodec ff_dirac_decoder = {
1992     .name           = "dirac",
1993     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1994     .type           = AVMEDIA_TYPE_VIDEO,
1995     .id             = AV_CODEC_ID_DIRAC,
1996     .priv_data_size = sizeof(DiracContext),
1997     .init           = dirac_decode_init,
1998     .close          = dirac_decode_end,
1999     .decode         = dirac_decode_frame,
2000     .capabilities   = CODEC_CAP_DELAY,
2001     .flush          = dirac_decode_flush,
2002 };