libavcodec/snowenc.c

   1 /*
   2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/emms.h"
  22 #include "libavutil/intmath.h"
  23 #include "libavutil/libm.h"
  24 #include "libavutil/log.h"
  25 #include "libavutil/opt.h"
  26 #include "libavutil/pixdesc.h"
  27 #include "avcodec.h"
  28 #include "codec_internal.h"
  29 #include "encode.h"
  30 #include "internal.h" //For AVCodecInternal.recon_frame
  31 #include "me_cmp.h"
  32 #include "packet_internal.h"
  33 #include "qpeldsp.h"
  34 #include "snow_dwt.h"
  35 #include "snow.h"
  36
  37 #include "rangecoder.h"
  38 #include "mathops.h"
  39
  40 #include "mpegvideo.h"
  41 #include "h263enc.h"
  42
  43 #define FF_ME_ITER 3
  44
  45 typedef struct SnowEncContext {
  46     SnowContext com;
  47     QpelDSPContext qdsp;
  48     MpegvideoEncDSPContext mpvencdsp;
  49
  50     int lambda;
  51     int lambda2;
  52     int pass1_rc;
  53
  54     int pred;
  55     int memc_only;
  56     int no_bitstream;
  57     int intra_penalty;
  58     int motion_est;
  59     int iterative_dia_size;
  60     int scenechange_threshold;
  61
  62     MECmpContext mecc;
  63     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
  64 #define ME_CACHE_SIZE 1024
  65     unsigned me_cache[ME_CACHE_SIZE];
  66     unsigned me_cache_generation;
  67
  68     uint64_t encoding_error[SNOW_MAX_PLANES];
  69 } SnowEncContext;
  70
  71 static void init_ref(MotionEstContext *c, const uint8_t *const src[3],
  72                      uint8_t *const ref[3], uint8_t *const ref2[3],
  73                      int x, int y, int ref_index)
  74 {
  75     SnowContext *s = c->avctx->priv_data;
  76     const int offset[3] = {
  77           y*c->  stride + x,
  78         ((y*c->uvstride + x) >> s->chroma_h_shift),
  79         ((y*c->uvstride + x) >> s->chroma_h_shift),
  80     };
  81     for (int i = 0; i < 3; i++) {
  82         c->src[0][i] = src [i];
  83         c->ref[0][i] = ref [i] + offset[i];
  84     }
  85     av_assert2(!ref_index);
  86 }
  87
  88 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed)
  89 {
  90     if (v) {
  91         const int a = FFABS(v);
  92         const int e = av_log2(a);
  93         const int el = FFMIN(e, 10);
  94         int i;
  95
  96         put_rac(c, state + 0, 0);
  97
  98         for (i = 0; i < el; i++)
  99             put_rac(c, state + 1 + i, 1);  //1..10
 100         for(; i < e; i++)
 101             put_rac(c, state + 1 + 9, 1);  //1..10
 102         put_rac(c, state + 1 + FFMIN(i, 9), 0);
 103
 104         for (i = e - 1; i >= el; i--)
 105             put_rac(c, state + 22 + 9, (a >> i) & 1); //22..31
 106         for(; i >= 0; i--)
 107             put_rac(c, state + 22 + i, (a >> i) & 1); //22..31
 108
 109         if (is_signed)
 110             put_rac(c, state + 11 + el, v < 0); //11..21
 111     } else {
 112         put_rac(c, state + 0, 1);
 113     }
 114 }
 115
 116 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2)
 117 {
 118     int r = log2 >= 0 ? 1<<log2 : 1;
 119
 120     av_assert2(v >= 0);
 121     av_assert2(log2 >= -4);
 122
 123     while (v >= r) {
 124         put_rac(c, state + 4 + log2, 1);
 125         v -= r;
 126         log2++;
 127         if (log2 > 0) r += r;
 128     }
 129     put_rac(c, state + 4 + log2, 0);
 130
 131     for (int i = log2 - 1; i >= 0; i--)
 132         put_rac(c, state + 31 - i, (v >> i) & 1);
 133 }
 134
 135 static int get_encode_buffer(SnowContext *s, AVFrame *frame)
 136 {
 137     int ret;
 138
 139     frame->width  = s->avctx->width  + 2 * EDGE_WIDTH;
 140     frame->height = s->avctx->height + 2 * EDGE_WIDTH;
 141
 142     ret = ff_encode_alloc_frame(s->avctx, frame);
 143     if (ret < 0)
 144         return ret;
 145     for (int i = 0; frame->data[i]; i++) {
 146         int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
 147                         frame->linesize[i] +
 148                         (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
 149         frame->data[i] += offset;
 150     }
 151     frame->width  = s->avctx->width;
 152     frame->height = s->avctx->height;
 153
 154     return 0;
 155 }
 156
 157 static av_cold int encode_init(AVCodecContext *avctx)
 158 {
 159     SnowEncContext *const enc = avctx->priv_data;
 160     SnowContext *const s = &enc->com;
 161     MpegEncContext *const mpv = &enc->m;
 162     int plane_index, ret;
 163     int i;
 164
 165     if (enc->pred == DWT_97
 166        && (avctx->flags & AV_CODEC_FLAG_QSCALE)
 167        && avctx->global_quality == 0){
 168         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
 169         return AVERROR(EINVAL);
 170     }
 171
 172     s->spatial_decomposition_type = enc->pred; //FIXME add decorrelator type r transform_type
 173
 174     s->mv_scale       = (avctx->flags & AV_CODEC_FLAG_QPEL) ? 2 : 4;
 175     s->block_max_depth= (avctx->flags & AV_CODEC_FLAG_4MV ) ? 1 : 0;
 176
 177     for(plane_index=0; plane_index<3; plane_index++){
 178         s->plane[plane_index].diag_mc= 1;
 179         s->plane[plane_index].htaps= 6;
 180         s->plane[plane_index].hcoeff[0]=  40;
 181         s->plane[plane_index].hcoeff[1]= -10;
 182         s->plane[plane_index].hcoeff[2]=   2;
 183         s->plane[plane_index].fast_mc= 1;
 184     }
 185
 186     // Must be before ff_snow_common_init()
 187     ff_hpeldsp_init(&s->hdsp, avctx->flags);
 188     if ((ret = ff_snow_common_init(avctx)) < 0) {
 189         return ret;
 190     }
 191
 192 #define mcf(dx,dy)\
 193     enc->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
 194     enc->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
 195         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
 196     enc->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
 197     enc->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
 198         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
 199
 200     mcf( 0, 0)
 201     mcf( 4, 0)
 202     mcf( 8, 0)
 203     mcf(12, 0)
 204     mcf( 0, 4)
 205     mcf( 4, 4)
 206     mcf( 8, 4)
 207     mcf(12, 4)
 208     mcf( 0, 8)
 209     mcf( 4, 8)
 210     mcf( 8, 8)
 211     mcf(12, 8)
 212     mcf( 0,12)
 213     mcf( 4,12)
 214     mcf( 8,12)
 215     mcf(12,12)
 216
 217     ff_me_cmp_init(&enc->mecc, avctx);
 218     ff_mpegvideoencdsp_init(&enc->mpvencdsp, avctx);
 219
 220     ff_snow_alloc_blocks(s);
 221
 222     s->version=0;
 223
 224     mpv->avctx   = avctx;
 225     mpv->bit_rate= avctx->bit_rate;
 226     mpv->lmin    = avctx->mb_lmin;
 227     mpv->lmax    = avctx->mb_lmax;
 228     mpv->mb_num  = (avctx->width * avctx->height + 255) / 256; // For ratecontrol
 229
 230     mpv->me.temp      =
 231     mpv->me.scratchpad = av_calloc(avctx->width + 64, 2*16*2*sizeof(uint8_t));
 232     mpv->sc.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
 233     mpv->me.map       = av_mallocz(2 * ME_MAP_SIZE * sizeof(*mpv->me.map));
 234     if (!mpv->me.scratchpad || !mpv->me.map || !mpv->sc.obmc_scratchpad)
 235         return AVERROR(ENOMEM);
 236     mpv->me.score_map = mpv->me.map + ME_MAP_SIZE;
 237
 238     ff_h263_encode_init(mpv); //mv_penalty
 239
 240     s->max_ref_frames = av_clip(avctx->refs, 1, MAX_REF_FRAMES);
 241
 242     if(avctx->flags&AV_CODEC_FLAG_PASS1){
 243         if(!avctx->stats_out)
 244             avctx->stats_out = av_mallocz(256);
 245
 246         if (!avctx->stats_out)
 247             return AVERROR(ENOMEM);
 248     }
 249     if((avctx->flags&AV_CODEC_FLAG_PASS2) || !(avctx->flags&AV_CODEC_FLAG_QSCALE)){
 250         ret = ff_rate_control_init(mpv);
 251         if(ret < 0)
 252             return ret;
 253     }
 254     enc->pass1_rc = !(avctx->flags & (AV_CODEC_FLAG_QSCALE|AV_CODEC_FLAG_PASS2));
 255
 256     switch(avctx->pix_fmt){
 257     case AV_PIX_FMT_YUV444P:
 258 //    case AV_PIX_FMT_YUV422P:
 259     case AV_PIX_FMT_YUV420P:
 260 //    case AV_PIX_FMT_YUV411P:
 261     case AV_PIX_FMT_YUV410P:
 262         s->nb_planes = 3;
 263         s->colorspace_type= 0;
 264         break;
 265     case AV_PIX_FMT_GRAY8:
 266         s->nb_planes = 1;
 267         s->colorspace_type = 1;
 268         break;
 269 /*    case AV_PIX_FMT_RGB32:
 270         s->colorspace= 1;
 271         break;*/
 272     }
 273
 274     ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift,
 275                                            &s->chroma_v_shift);
 276     if (ret)
 277         return ret;
 278
 279     ret  = ff_set_cmp(&enc->mecc, enc->mecc.me_cmp, s->avctx->me_cmp);
 280     ret |= ff_set_cmp(&enc->mecc, enc->mecc.me_sub_cmp, s->avctx->me_sub_cmp);
 281     if (ret < 0)
 282         return AVERROR(EINVAL);
 283
 284     s->input_picture = av_frame_alloc();
 285     if (!s->input_picture)
 286         return AVERROR(ENOMEM);
 287
 288     if ((ret = get_encode_buffer(s, s->input_picture)) < 0)
 289         return ret;
 290
 291     if (enc->motion_est == FF_ME_ITER) {
 292         int size= s->b_width * s->b_height << 2*s->block_max_depth;
 293         for(i=0; i<s->max_ref_frames; i++){
 294             s->ref_mvs[i]    = av_calloc(size, sizeof(*s->ref_mvs[i]));
 295             s->ref_scores[i] = av_calloc(size, sizeof(*s->ref_scores[i]));
 296             if (!s->ref_mvs[i] || !s->ref_scores[i])
 297                 return AVERROR(ENOMEM);
 298         }
 299     }
 300
 301     return 0;
 302 }
 303
 304 //near copy & paste from dsputil, FIXME
 305 static int pix_sum(const uint8_t * pix, int line_size, int w, int h)
 306 {
 307     int s, i, j;
 308
 309     s = 0;
 310     for (i = 0; i < h; i++) {
 311         for (j = 0; j < w; j++) {
 312             s += pix[0];
 313             pix ++;
 314         }
 315         pix += line_size - w;
 316     }
 317     return s;
 318 }
 319
 320 //near copy & paste from dsputil, FIXME
 321 static int pix_norm1(const uint8_t * pix, int line_size, int w)
 322 {
 323     int s, i, j;
 324     const uint32_t *sq = ff_square_tab + 256;
 325
 326     s = 0;
 327     for (i = 0; i < w; i++) {
 328         for (j = 0; j < w; j ++) {
 329             s += sq[pix[0]];
 330             pix ++;
 331         }
 332         pix += line_size - w;
 333     }
 334     return s;
 335 }
 336
 337 static inline int get_penalty_factor(int lambda, int lambda2, int type){
 338     switch(type&0xFF){
 339     default:
 340     case FF_CMP_SAD:
 341         return lambda>>FF_LAMBDA_SHIFT;
 342     case FF_CMP_DCT:
 343         return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
 344     case FF_CMP_W53:
 345         return (4*lambda)>>(FF_LAMBDA_SHIFT);
 346     case FF_CMP_W97:
 347         return (2*lambda)>>(FF_LAMBDA_SHIFT);
 348     case FF_CMP_SATD:
 349     case FF_CMP_DCT264:
 350         return (2*lambda)>>FF_LAMBDA_SHIFT;
 351     case FF_CMP_RD:
 352     case FF_CMP_PSNR:
 353     case FF_CMP_SSE:
 354     case FF_CMP_NSSE:
 355         return lambda2>>FF_LAMBDA_SHIFT;
 356     case FF_CMP_BIT:
 357         return 1;
 358     }
 359 }
 360
 361 //FIXME copy&paste
 362 #define P_LEFT P[1]
 363 #define P_TOP P[2]
 364 #define P_TOPRIGHT P[3]
 365 #define P_MEDIAN P[4]
 366 #define P_MV1 P[9]
 367 #define FLAG_QPEL   1 //must be 1
 368
 369 static int encode_q_branch(SnowEncContext *enc, int level, int x, int y)
 370 {
 371     SnowContext      *const s = &enc->com;
 372     MotionEstContext *const c = &enc->m.me;
 373     uint8_t p_buffer[1024];
 374     uint8_t i_buffer[1024];
 375     uint8_t p_state[sizeof(s->block_state)];
 376     uint8_t i_state[sizeof(s->block_state)];
 377     RangeCoder pc, ic;
 378     uint8_t *pbbak= s->c.bytestream;
 379     uint8_t *pbbak_start= s->c.bytestream_start;
 380     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
 381     const int w= s->b_width  << s->block_max_depth;
 382     const int h= s->b_height << s->block_max_depth;
 383     const int rem_depth= s->block_max_depth - level;
 384     const int index= (x + y*w) << rem_depth;
 385     const int block_w= 1<<(LOG2_MB_SIZE - level);
 386     int trx= (x+1)<<rem_depth;
 387     int try= (y+1)<<rem_depth;
 388     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
 389     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
 390     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
 391     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
 392     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
 393     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
 394     int pl = left->color[0];
 395     int pcb= left->color[1];
 396     int pcr= left->color[2];
 397     int pmx, pmy;
 398     int mx=0, my=0;
 399     int l,cr,cb;
 400     const int stride= s->current_picture->linesize[0];
 401     const int uvstride= s->current_picture->linesize[1];
 402     const uint8_t *const current_data[3] = { s->input_picture->data[0] + (x + y*  stride)*block_w,
 403                                 s->input_picture->data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift),
 404                                 s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
 405     int P[10][2];
 406     int16_t last_mv[3][2];
 407     int qpel= !!(s->avctx->flags & AV_CODEC_FLAG_QPEL); //unused
 408     const int shift= 1+qpel;
 409     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
 410     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
 411     int my_context= av_log2(2*FFABS(left->my - top->my));
 412     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
 413     int ref, best_ref, ref_score, ref_mx, ref_my;
 414
 415     av_assert0(sizeof(s->block_state) >= 256);
 416     if(s->keyframe){
 417         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
 418         return 0;
 419     }
 420
 421 //    clip predictors / edge ?
 422
 423     P_LEFT[0]= left->mx;
 424     P_LEFT[1]= left->my;
 425     P_TOP [0]= top->mx;
 426     P_TOP [1]= top->my;
 427     P_TOPRIGHT[0]= tr->mx;
 428     P_TOPRIGHT[1]= tr->my;
 429
 430     last_mv[0][0]= s->block[index].mx;
 431     last_mv[0][1]= s->block[index].my;
 432     last_mv[1][0]= right->mx;
 433     last_mv[1][1]= right->my;
 434     last_mv[2][0]= bottom->mx;
 435     last_mv[2][1]= bottom->my;
 436
 437     enc->m.mb_stride = 2;
 438     enc->m.mb_x =
 439     enc->m.mb_y = 0;
 440     c->skip= 0;
 441
 442     av_assert1(c->  stride ==   stride);
 443     av_assert1(c->uvstride == uvstride);
 444
 445     c->penalty_factor    = get_penalty_factor(enc->lambda, enc->lambda2, c->avctx->me_cmp);
 446     c->sub_penalty_factor= get_penalty_factor(enc->lambda, enc->lambda2, c->avctx->me_sub_cmp);
 447     c->mb_penalty_factor = get_penalty_factor(enc->lambda, enc->lambda2, c->avctx->mb_cmp);
 448     c->current_mv_penalty = c->mv_penalty[enc->m.f_code=1] + MAX_DMV;
 449
 450     c->xmin = - x*block_w - 16+3;
 451     c->ymin = - y*block_w - 16+3;
 452     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
 453     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
 454
 455     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
 456     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
 457     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
 458     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
 459     if(P_TOPRIGHT[0] < (c->xmin * (1<<shift))) P_TOPRIGHT[0]= (c->xmin * (1<<shift));
 460     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
 461     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
 462
 463     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
 464     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
 465
 466     if (!y) {
 467         c->pred_x= P_LEFT[0];
 468         c->pred_y= P_LEFT[1];
 469     } else {
 470         c->pred_x = P_MEDIAN[0];
 471         c->pred_y = P_MEDIAN[1];
 472     }
 473
 474     score= INT_MAX;
 475     best_ref= 0;
 476     for(ref=0; ref<s->ref_frames; ref++){
 477         init_ref(c, current_data, s->last_picture[ref]->data, NULL, block_w*x, block_w*y, 0);
 478
 479         ref_score= ff_epzs_motion_search(&enc->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
 480                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
 481
 482         av_assert2(ref_mx >= c->xmin);
 483         av_assert2(ref_mx <= c->xmax);
 484         av_assert2(ref_my >= c->ymin);
 485         av_assert2(ref_my <= c->ymax);
 486
 487         ref_score= c->sub_motion_search(&enc->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
 488         ref_score= ff_get_mb_score(&enc->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
 489         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
 490         if(s->ref_mvs[ref]){
 491             s->ref_mvs[ref][index][0]= ref_mx;
 492             s->ref_mvs[ref][index][1]= ref_my;
 493             s->ref_scores[ref][index]= ref_score;
 494         }
 495         if(score > ref_score){
 496             score= ref_score;
 497             best_ref= ref;
 498             mx= ref_mx;
 499             my= ref_my;
 500         }
 501     }
 502     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
 503
 504   //  subpel search
 505     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
 506     pc= s->c;
 507     pc.bytestream_start=
 508     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
 509     memcpy(p_state, s->block_state, sizeof(s->block_state));
 510
 511     if(level!=s->block_max_depth)
 512         put_rac(&pc, &p_state[4 + s_context], 1);
 513     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
 514     if(s->ref_frames > 1)
 515         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
 516     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
 517     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
 518     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
 519     p_len= pc.bytestream - pc.bytestream_start;
 520     score += (enc->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
 521
 522     block_s= block_w*block_w;
 523     sum = pix_sum(current_data[0], stride, block_w, block_w);
 524     l= (sum + block_s/2)/block_s;
 525     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
 526
 527     if (s->nb_planes > 2) {
 528         block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
 529         sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
 530         cb= (sum + block_s/2)/block_s;
 531     //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
 532         sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
 533         cr= (sum + block_s/2)/block_s;
 534     //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
 535     }else
 536         cb = cr = 0;
 537
 538     ic= s->c;
 539     ic.bytestream_start=
 540     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
 541     memcpy(i_state, s->block_state, sizeof(s->block_state));
 542     if(level!=s->block_max_depth)
 543         put_rac(&ic, &i_state[4 + s_context], 1);
 544     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
 545     put_symbol(&ic, &i_state[32],  l-pl , 1);
 546     if (s->nb_planes > 2) {
 547         put_symbol(&ic, &i_state[64], cb-pcb, 1);
 548         put_symbol(&ic, &i_state[96], cr-pcr, 1);
 549     }
 550     i_len= ic.bytestream - ic.bytestream_start;
 551     iscore += (enc->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
 552
 553     av_assert1(iscore < 255*255*256 + enc->lambda2*10);
 554     av_assert1(iscore >= 0);
 555     av_assert1(l>=0 && l<=255);
 556     av_assert1(pl>=0 && pl<=255);
 557
 558     if(level==0){
 559         int varc= iscore >> 8;
 560         int vard= score >> 8;
 561         if (vard <= 64 || vard < varc)
 562             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
 563         else
 564             c->scene_change_score += enc->m.qscale;
 565     }
 566
 567     if(level!=s->block_max_depth){
 568         put_rac(&s->c, &s->block_state[4 + s_context], 0);
 569         score2 = encode_q_branch(enc, level+1, 2*x+0, 2*y+0);
 570         score2+= encode_q_branch(enc, level+1, 2*x+1, 2*y+0);
 571         score2+= encode_q_branch(enc, level+1, 2*x+0, 2*y+1);
 572         score2+= encode_q_branch(enc, level+1, 2*x+1, 2*y+1);
 573         score2+= enc->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
 574
 575         if(score2 < score && score2 < iscore)
 576             return score2;
 577     }
 578
 579     if(iscore < score){
 580         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
 581         memcpy(pbbak, i_buffer, i_len);
 582         s->c= ic;
 583         s->c.bytestream_start= pbbak_start;
 584         s->c.bytestream= pbbak + i_len;
 585         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
 586         memcpy(s->block_state, i_state, sizeof(s->block_state));
 587         return iscore;
 588     }else{
 589         memcpy(pbbak, p_buffer, p_len);
 590         s->c= pc;
 591         s->c.bytestream_start= pbbak_start;
 592         s->c.bytestream= pbbak + p_len;
 593         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
 594         memcpy(s->block_state, p_state, sizeof(s->block_state));
 595         return score;
 596     }
 597 }
 598
 599 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
 600     const int w= s->b_width  << s->block_max_depth;
 601     const int rem_depth= s->block_max_depth - level;
 602     const int index= (x + y*w) << rem_depth;
 603     int trx= (x+1)<<rem_depth;
 604     BlockNode *b= &s->block[index];
 605     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
 606     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
 607     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
 608     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
 609     int pl = left->color[0];
 610     int pcb= left->color[1];
 611     int pcr= left->color[2];
 612     int pmx, pmy;
 613     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
 614     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
 615     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
 616     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
 617
 618     if(s->keyframe){
 619         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
 620         return;
 621     }
 622
 623     if(level!=s->block_max_depth){
 624         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
 625             put_rac(&s->c, &s->block_state[4 + s_context], 1);
 626         }else{
 627             put_rac(&s->c, &s->block_state[4 + s_context], 0);
 628             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
 629             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
 630             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
 631             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
 632             return;
 633         }
 634     }
 635     if(b->type & BLOCK_INTRA){
 636         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
 637         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
 638         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
 639         if (s->nb_planes > 2) {
 640             put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
 641             put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
 642         }
 643         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
 644     }else{
 645         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
 646         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
 647         if(s->ref_frames > 1)
 648             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
 649         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
 650         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
 651         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
 652     }
 653 }
 654
 655 static int get_dc(SnowEncContext *enc, int mb_x, int mb_y, int plane_index)
 656 {
 657     SnowContext *const s = &enc->com;
 658     int i, x2, y2;
 659     Plane *p= &s->plane[plane_index];
 660     const int block_size = MB_SIZE >> s->block_max_depth;
 661     const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
 662     const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
 663     const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
 664     const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
 665     const int ref_stride= s->current_picture->linesize[plane_index];
 666     const uint8_t *src = s->input_picture->data[plane_index];
 667     IDWTELEM *dst= (IDWTELEM*)enc->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
 668     const int b_stride = s->b_width << s->block_max_depth;
 669     const int w= p->width;
 670     const int h= p->height;
 671     int index= mb_x + mb_y*b_stride;
 672     BlockNode *b= &s->block[index];
 673     BlockNode backup= *b;
 674     int ab=0;
 675     int aa=0;
 676
 677     av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
 678
 679     b->type|= BLOCK_INTRA;
 680     b->color[plane_index]= 0;
 681     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
 682
 683     for(i=0; i<4; i++){
 684         int mb_x2= mb_x + (i &1) - 1;
 685         int mb_y2= mb_y + (i>>1) - 1;
 686         int x= block_w*mb_x2 + block_w/2;
 687         int y= block_h*mb_y2 + block_h/2;
 688
 689         add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc,
 690                     x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
 691
 692         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_h); y2++){
 693             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
 694                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride;
 695                 int obmc_v= obmc[index];
 696                 int d;
 697                 if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
 698                 if(x<0) obmc_v += obmc[index + block_w];
 699                 if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
 700                 if(x+block_w>w) obmc_v += obmc[index - block_w];
 701                 //FIXME precalculate this or simplify it somehow else
 702
 703                 d = -dst[index] + (1<<(FRAC_BITS-1));
 704                 dst[index] = d;
 705                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
 706                 aa += obmc_v * obmc_v; //FIXME precalculate this
 707             }
 708         }
 709     }
 710     *b= backup;
 711
 712     return av_clip_uint8( ROUNDED_DIV(ab<<LOG2_OBMC_MAX, aa) ); //FIXME we should not need clipping
 713 }
 714
 715 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
 716     const int b_stride = s->b_width << s->block_max_depth;
 717     const int b_height = s->b_height<< s->block_max_depth;
 718     int index= x + y*b_stride;
 719     const BlockNode *b     = &s->block[index];
 720     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
 721     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
 722     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
 723     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
 724     int dmx, dmy;
 725 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
 726 //  int my_context= av_log2(2*FFABS(left->my - top->my));
 727
 728     if(x<0 || x>=b_stride || y>=b_height)
 729         return 0;
 730 /*
 731 1            0      0
 732 01X          1-2    1
 733 001XX        3-6    2-3
 734 0001XXX      7-14   4-7
 735 00001XXXX   15-30   8-15
 736 */
 737 //FIXME try accurate rate
 738 //FIXME intra and inter predictors if surrounding blocks are not the same type
 739     if(b->type & BLOCK_INTRA){
 740         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
 741                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
 742                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
 743     }else{
 744         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
 745         dmx-= b->mx;
 746         dmy-= b->my;
 747         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
 748                     + av_log2(2*FFABS(dmy))
 749                     + av_log2(2*b->ref));
 750     }
 751 }
 752
 753 static int get_block_rd(SnowEncContext *enc, int mb_x, int mb_y,
 754                         int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2])
 755 {
 756     SnowContext *const s = &enc->com;
 757     Plane *p= &s->plane[plane_index];
 758     const int block_size = MB_SIZE >> s->block_max_depth;
 759     const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
 760     const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
 761     const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
 762     const int ref_stride= s->current_picture->linesize[plane_index];
 763     uint8_t *dst= s->current_picture->data[plane_index];
 764     const uint8_t *src = s->input_picture->data[plane_index];
 765     IDWTELEM *pred= (IDWTELEM*)enc->m.sc.obmc_scratchpad + plane_index*block_size*block_size*4;
 766     uint8_t *cur = s->scratchbuf;
 767     uint8_t *tmp = s->emu_edge_buffer;
 768     const int b_stride = s->b_width << s->block_max_depth;
 769     const int b_height = s->b_height<< s->block_max_depth;
 770     const int w= p->width;
 771     const int h= p->height;
 772     int distortion;
 773     int rate= 0;
 774     const int penalty_factor = get_penalty_factor(enc->lambda, enc->lambda2, s->avctx->me_cmp);
 775     int sx= block_w*mb_x - block_w/2;
 776     int sy= block_h*mb_y - block_h/2;
 777     int x0= FFMAX(0,-sx);
 778     int y0= FFMAX(0,-sy);
 779     int x1= FFMIN(block_w*2, w-sx);
 780     int y1= FFMIN(block_h*2, h-sy);
 781     int i,x,y;
 782
 783     av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
 784
 785     ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
 786
 787     for(y=y0; y<y1; y++){
 788         const uint8_t *obmc1= obmc_edged[y];
 789         const IDWTELEM *pred1 = pred + y*obmc_stride;
 790         uint8_t *cur1 = cur + y*ref_stride;
 791         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
 792         for(x=x0; x<x1; x++){
 793 #if FRAC_BITS >= LOG2_OBMC_MAX
 794             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
 795 #else
 796             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
 797 #endif
 798             v = (v + pred1[x]) >> FRAC_BITS;
 799             if(v&(~255)) v= ~(v>>31);
 800             dst1[x] = v;
 801         }
 802     }
 803
 804     /* copy the regions where obmc[] = (uint8_t)256 */
 805     if(LOG2_OBMC_MAX == 8
 806         && (mb_x == 0 || mb_x == b_stride-1)
 807         && (mb_y == 0 || mb_y == b_height-1)){
 808         if(mb_x == 0)
 809             x1 = block_w;
 810         else
 811             x0 = block_w;
 812         if(mb_y == 0)
 813             y1 = block_h;
 814         else
 815             y0 = block_h;
 816         for(y=y0; y<y1; y++)
 817             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
 818     }
 819
 820     if(block_w==16){
 821         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
 822         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
 823         /* FIXME cmps overlap but do not cover the wavelet's whole support.
 824          * So improving the score of one block is not strictly guaranteed
 825          * to improve the score of the whole frame, thus iterative motion
 826          * estimation does not always converge. */
 827         if(s->avctx->me_cmp == FF_CMP_W97)
 828             distortion = ff_w97_32_c(&enc->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
 829         else if(s->avctx->me_cmp == FF_CMP_W53)
 830             distortion = ff_w53_32_c(&enc->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
 831         else{
 832             distortion = 0;
 833             for(i=0; i<4; i++){
 834                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
 835                 distortion += enc->mecc.me_cmp[0](&enc->m, src + off, dst + off, ref_stride, 16);
 836             }
 837         }
 838     }else{
 839         av_assert2(block_w==8);
 840         distortion = enc->mecc.me_cmp[0](&enc->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
 841     }
 842
 843     if(plane_index==0){
 844         for(i=0; i<4; i++){
 845 /* ..RRr
 846  * .RXx.
 847  * rxx..
 848  */
 849             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
 850         }
 851         if(mb_x == b_stride-2)
 852             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
 853     }
 854     return distortion + rate*penalty_factor;
 855 }
 856
 857 static int get_4block_rd(SnowEncContext *enc, int mb_x, int mb_y, int plane_index)
 858 {
 859     SnowContext *const s = &enc->com;
 860     int i, y2;
 861     Plane *p= &s->plane[plane_index];
 862     const int block_size = MB_SIZE >> s->block_max_depth;
 863     const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
 864     const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
 865     const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
 866     const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
 867     const int ref_stride= s->current_picture->linesize[plane_index];
 868     uint8_t *dst= s->current_picture->data[plane_index];
 869     const uint8_t *src = s->input_picture->data[plane_index];
 870     //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
 871     // const has only been removed from zero_dst to suppress a warning
 872     static IDWTELEM zero_dst[4096]; //FIXME
 873     const int b_stride = s->b_width << s->block_max_depth;
 874     const int w= p->width;
 875     const int h= p->height;
 876     int distortion= 0;
 877     int rate= 0;
 878     const int penalty_factor= get_penalty_factor(enc->lambda, enc->lambda2, s->avctx->me_cmp);
 879
 880     av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
 881
 882     for(i=0; i<9; i++){
 883         int mb_x2= mb_x + (i%3) - 1;
 884         int mb_y2= mb_y + (i/3) - 1;
 885         int x= block_w*mb_x2 + block_w/2;
 886         int y= block_h*mb_y2 + block_h/2;
 887
 888         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
 889                    x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
 890
 891         //FIXME find a cleaner/simpler way to skip the outside stuff
 892         for(y2= y; y2<0; y2++)
 893             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
 894         for(y2= h; y2<y+block_h; y2++)
 895             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
 896         if(x<0){
 897             for(y2= y; y2<y+block_h; y2++)
 898                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
 899         }
 900         if(x+block_w > w){
 901             for(y2= y; y2<y+block_h; y2++)
 902                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
 903         }
 904
 905         av_assert1(block_w== 8 || block_w==16);
 906         distortion += enc->mecc.me_cmp[block_w==8](&enc->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h);
 907     }
 908
 909     if(plane_index==0){
 910         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
 911         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
 912
 913 /* ..RRRr
 914  * .RXXx.
 915  * .RXXx.
 916  * rxxx.
 917  */
 918         if(merged)
 919             rate = get_block_bits(s, mb_x, mb_y, 2);
 920         for(i=merged?4:0; i<9; i++){
 921             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
 922             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
 923         }
 924     }
 925     return distortion + rate*penalty_factor;
 926 }
 927
 928 static int encode_subband_c0run(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
 929     const int w= b->width;
 930     const int h= b->height;
 931     int x, y;
 932
 933     if(1){
 934         int run=0;
 935         int *runs = s->run_buffer;
 936         int run_index=0;
 937         int max_index;
 938
 939         for(y=0; y<h; y++){
 940             for(x=0; x<w; x++){
 941                 int v, p=0;
 942                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
 943                 v= src[x + y*stride];
 944
 945                 if(y){
 946                     t= src[x + (y-1)*stride];
 947                     if(x){
 948                         lt= src[x - 1 + (y-1)*stride];
 949                     }
 950                     if(x + 1 < w){
 951                         rt= src[x + 1 + (y-1)*stride];
 952                     }
 953                 }
 954                 if(x){
 955                     l= src[x - 1 + y*stride];
 956                     /*if(x > 1){
 957                         if(orientation==1) ll= src[y + (x-2)*stride];
 958                         else               ll= src[x - 2 + y*stride];
 959                     }*/
 960                 }
 961                 if(parent){
 962                     int px= x>>1;
 963                     int py= y>>1;
 964                     if(px<b->parent->width && py<b->parent->height)
 965                         p= parent[px + py*2*stride];
 966                 }
 967                 if(!(/*ll|*/l|lt|t|rt|p)){
 968                     if(v){
 969                         runs[run_index++]= run;
 970                         run=0;
 971                     }else{
 972                         run++;
 973                     }
 974                 }
 975             }
 976         }
 977         max_index= run_index;
 978         runs[run_index++]= run;
 979         run_index=0;
 980         run= runs[run_index++];
 981
 982         put_symbol2(&s->c, b->state[30], max_index, 0);
 983         if(run_index <= max_index)
 984             put_symbol2(&s->c, b->state[1], run, 3);
 985
 986         for(y=0; y<h; y++){
 987             if(s->c.bytestream_end - s->c.bytestream < w*40){
 988                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
 989                 return AVERROR(ENOMEM);
 990             }
 991             for(x=0; x<w; x++){
 992                 int v, p=0;
 993                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
 994                 v= src[x + y*stride];
 995
 996                 if(y){
 997                     t= src[x + (y-1)*stride];
 998                     if(x){
 999                         lt= src[x - 1 + (y-1)*stride];
1000                     }
1001                     if(x + 1 < w){
1002                         rt= src[x + 1 + (y-1)*stride];
1003                     }
1004                 }
1005                 if(x){
1006                     l= src[x - 1 + y*stride];
1007                     /*if(x > 1){
1008                         if(orientation==1) ll= src[y + (x-2)*stride];
1009                         else               ll= src[x - 2 + y*stride];
1010                     }*/
1011                 }
1012                 if(parent){
1013                     int px= x>>1;
1014                     int py= y>>1;
1015                     if(px<b->parent->width && py<b->parent->height)
1016                         p= parent[px + py*2*stride];
1017                 }
1018                 if(/*ll|*/l|lt|t|rt|p){
1019                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1020
1021                     put_rac(&s->c, &b->state[0][context], !!v);
1022                 }else{
1023                     if(!run){
1024                         run= runs[run_index++];
1025
1026                         if(run_index <= max_index)
1027                             put_symbol2(&s->c, b->state[1], run, 3);
1028                         av_assert2(v);
1029                     }else{
1030                         run--;
1031                         av_assert2(!v);
1032                     }
1033                 }
1034                 if(v){
1035                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1036                     int l2= 2*FFABS(l) + (l<0);
1037                     int t2= 2*FFABS(t) + (t<0);
1038
1039                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1040                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + ff_quant3bA[l2&0xFF] + 3*ff_quant3bA[t2&0xFF]], v<0);
1041                 }
1042             }
1043         }
1044     }
1045     return 0;
1046 }
1047
1048 static int encode_subband(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
1049 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
1050 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
1051     return encode_subband_c0run(s, b, src, parent, stride, orientation);
1052 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
1053 }
1054
1055 static av_always_inline int check_block_intra(SnowEncContext *enc, int mb_x, int mb_y, int p[3],
1056                                               uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd)
1057 {
1058     SnowContext *const s = &enc->com;
1059     const int b_stride= s->b_width << s->block_max_depth;
1060     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
1061     BlockNode backup= *block;
1062     int rd;
1063
1064     av_assert2(mb_x>=0 && mb_y>=0);
1065     av_assert2(mb_x<b_stride);
1066
1067     block->color[0] = p[0];
1068     block->color[1] = p[1];
1069     block->color[2] = p[2];
1070     block->type |= BLOCK_INTRA;
1071
1072     rd = get_block_rd(enc, mb_x, mb_y, 0, obmc_edged) + enc->intra_penalty;
1073
1074 //FIXME chroma
1075     if(rd < *best_rd){
1076         *best_rd= rd;
1077         return 1;
1078     }else{
1079         *block= backup;
1080         return 0;
1081     }
1082 }
1083
1084 /* special case for int[2] args we discard afterwards,
1085  * fixes compilation problem with gcc 2.95 */
1086 static av_always_inline int check_block_inter(SnowEncContext *enc,
1087                                               int mb_x, int mb_y, int p0, int p1,
1088                                               uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd)
1089 {
1090     SnowContext *const s = &enc->com;
1091     const int b_stride = s->b_width << s->block_max_depth;
1092     BlockNode *block = &s->block[mb_x + mb_y * b_stride];
1093     BlockNode backup = *block;
1094     unsigned value;
1095     int rd, index;
1096
1097     av_assert2(mb_x >= 0 && mb_y >= 0);
1098     av_assert2(mb_x < b_stride);
1099
1100     index = (p0 + 31 * p1) & (ME_CACHE_SIZE-1);
1101     value = enc->me_cache_generation + (p0 >> 10) + p1 * (1 << 6) + (block->ref << 12);
1102     if (enc->me_cache[index] == value)
1103         return 0;
1104     enc->me_cache[index] = value;
1105
1106     block->mx = p0;
1107     block->my = p1;
1108     block->type &= ~BLOCK_INTRA;
1109
1110     rd = get_block_rd(enc, mb_x, mb_y, 0, obmc_edged);
1111
1112 //FIXME chroma
1113     if (rd < *best_rd) {
1114         *best_rd = rd;
1115         return 1;
1116     } else {
1117         *block   = backup;
1118         return 0;
1119     }
1120 }
1121
1122 static av_always_inline int check_4block_inter(SnowEncContext *enc, int mb_x, int mb_y,
1123                                                int p0, int p1, int ref, int *best_rd)
1124 {
1125     SnowContext *const s = &enc->com;
1126     const int b_stride= s->b_width << s->block_max_depth;
1127     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
1128     BlockNode backup[4];
1129     unsigned value;
1130     int rd, index;
1131
1132     /* We don't initialize backup[] during variable declaration, because
1133      * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
1134      * 'int16_t'". */
1135     backup[0] = block[0];
1136     backup[1] = block[1];
1137     backup[2] = block[b_stride];
1138     backup[3] = block[b_stride + 1];
1139
1140     av_assert2(mb_x>=0 && mb_y>=0);
1141     av_assert2(mb_x<b_stride);
1142     av_assert2(((mb_x|mb_y)&1) == 0);
1143
1144     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
1145     value = enc->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
1146     if (enc->me_cache[index] == value)
1147         return 0;
1148     enc->me_cache[index] = value;
1149
1150     block->mx= p0;
1151     block->my= p1;
1152     block->ref= ref;
1153     block->type &= ~BLOCK_INTRA;
1154     block[1]= block[b_stride]= block[b_stride+1]= *block;
1155
1156     rd = get_4block_rd(enc, mb_x, mb_y, 0);
1157
1158 //FIXME chroma
1159     if(rd < *best_rd){
1160         *best_rd= rd;
1161         return 1;
1162     }else{
1163         block[0]= backup[0];
1164         block[1]= backup[1];
1165         block[b_stride]= backup[2];
1166         block[b_stride+1]= backup[3];
1167         return 0;
1168     }
1169 }
1170
1171 static void iterative_me(SnowEncContext *enc)
1172 {
1173     SnowContext *const s = &enc->com;
1174     int pass, mb_x, mb_y;
1175     const int b_width = s->b_width  << s->block_max_depth;
1176     const int b_height= s->b_height << s->block_max_depth;
1177     const int b_stride= b_width;
1178     int color[3];
1179
1180     {
1181         RangeCoder r = s->c;
1182         uint8_t state[sizeof(s->block_state)];
1183         memcpy(state, s->block_state, sizeof(s->block_state));
1184         for(mb_y= 0; mb_y<s->b_height; mb_y++)
1185             for(mb_x= 0; mb_x<s->b_width; mb_x++)
1186                 encode_q_branch(enc, 0, mb_x, mb_y);
1187         s->c = r;
1188         memcpy(s->block_state, state, sizeof(s->block_state));
1189     }
1190
1191     for(pass=0; pass<25; pass++){
1192         int change= 0;
1193
1194         for(mb_y= 0; mb_y<b_height; mb_y++){
1195             for(mb_x= 0; mb_x<b_width; mb_x++){
1196                 int dia_change, i, j, ref;
1197                 int best_rd= INT_MAX, ref_rd;
1198                 BlockNode backup, ref_b;
1199                 const int index= mb_x + mb_y * b_stride;
1200                 BlockNode *block= &s->block[index];
1201                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
1202                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
1203                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
1204                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
1205                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
1206                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
1207                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
1208                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
1209                 const int b_w= (MB_SIZE >> s->block_max_depth);
1210                 uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];
1211
1212                 if(pass && (block->type & BLOCK_OPT))
1213                     continue;
1214                 block->type |= BLOCK_OPT;
1215
1216                 backup= *block;
1217
1218                 if (!enc->me_cache_generation)
1219                     memset(enc->me_cache, 0, sizeof(enc->me_cache));
1220                 enc->me_cache_generation += 1<<22;
1221
1222                 //FIXME precalculate
1223                 {
1224                     int x, y;
1225                     for (y = 0; y < b_w * 2; y++)
1226                         memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
1227                     if(mb_x==0)
1228                         for(y=0; y<b_w*2; y++)
1229                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
1230                     if(mb_x==b_stride-1)
1231                         for(y=0; y<b_w*2; y++)
1232                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
1233                     if(mb_y==0){
1234                         for(x=0; x<b_w*2; x++)
1235                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
1236                         for(y=1; y<b_w; y++)
1237                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
1238                     }
1239                     if(mb_y==b_height-1){
1240                         for(x=0; x<b_w*2; x++)
1241                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
1242                         for(y=b_w; y<b_w*2-1; y++)
1243                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
1244                     }
1245                 }
1246
1247                 //skip stuff outside the picture
1248                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
1249                     const uint8_t *src = s->input_picture->data[0];
1250                     uint8_t *dst= s->current_picture->data[0];
1251                     const int stride= s->current_picture->linesize[0];
1252                     const int block_w= MB_SIZE >> s->block_max_depth;
1253                     const int block_h= MB_SIZE >> s->block_max_depth;
1254                     const int sx= block_w*mb_x - block_w/2;
1255                     const int sy= block_h*mb_y - block_h/2;
1256                     const int w= s->plane[0].width;
1257                     const int h= s->plane[0].height;
1258                     int y;
1259
1260                     for(y=sy; y<0; y++)
1261                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
1262                     for(y=h; y<sy+block_h*2; y++)
1263                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
1264                     if(sx<0){
1265                         for(y=sy; y<sy+block_h*2; y++)
1266                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
1267                     }
1268                     if(sx+block_w*2 > w){
1269                         for(y=sy; y<sy+block_h*2; y++)
1270                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
1271                     }
1272                 }
1273
1274                 // intra(black) = neighbors' contribution to the current block
1275                 for(i=0; i < s->nb_planes; i++)
1276                     color[i]= get_dc(enc, mb_x, mb_y, i);
1277
1278                 // get previous score (cannot be cached due to OBMC)
1279                 if(pass > 0 && (block->type&BLOCK_INTRA)){
1280                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
1281                     check_block_intra(enc, mb_x, mb_y, color0, obmc_edged, &best_rd);
1282                 }else
1283                     check_block_inter(enc, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);
1284
1285                 ref_b= *block;
1286                 ref_rd= best_rd;
1287                 for(ref=0; ref < s->ref_frames; ref++){
1288                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
1289                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
1290                         continue;
1291                     block->ref= ref;
1292                     best_rd= INT_MAX;
1293
1294                     check_block_inter(enc, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
1295                     check_block_inter(enc, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
1296                     if(tb)
1297                         check_block_inter(enc, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
1298                     if(lb)
1299                         check_block_inter(enc, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
1300                     if(rb)
1301                         check_block_inter(enc, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
1302                     if(bb)
1303                         check_block_inter(enc, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);
1304
1305                     /* fullpel ME */
1306                     //FIXME avoid subpel interpolation / round to nearest integer
1307                     do{
1308                         int newx = block->mx;
1309                         int newy = block->my;
1310                         int dia_size = enc->iterative_dia_size ? enc->iterative_dia_size : FFMAX(s->avctx->dia_size, 1);
1311                         dia_change=0;
1312                         for(i=0; i < dia_size; i++){
1313                             for(j=0; j<i; j++){
1314                                 dia_change |= check_block_inter(enc, mb_x, mb_y, newx+4*(i-j), newy+(4*j), obmc_edged, &best_rd);
1315                                 dia_change |= check_block_inter(enc, mb_x, mb_y, newx-4*(i-j), newy-(4*j), obmc_edged, &best_rd);
1316                                 dia_change |= check_block_inter(enc, mb_x, mb_y, newx-(4*j), newy+4*(i-j), obmc_edged, &best_rd);
1317                                 dia_change |= check_block_inter(enc, mb_x, mb_y, newx+(4*j), newy-4*(i-j), obmc_edged, &best_rd);
1318                             }
1319                         }
1320                     }while(dia_change);
1321                     /* subpel ME */
1322                     do{
1323                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
1324                         dia_change=0;
1325                         for(i=0; i<8; i++)
1326                             dia_change |= check_block_inter(enc, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
1327                     }while(dia_change);
1328                     //FIXME or try the standard 2 pass qpel or similar
1329
1330                     mvr[0][0]= block->mx;
1331                     mvr[0][1]= block->my;
1332                     if(ref_rd > best_rd){
1333                         ref_rd= best_rd;
1334                         ref_b= *block;
1335                     }
1336                 }
1337                 best_rd= ref_rd;
1338                 *block= ref_b;
1339                 check_block_intra(enc, mb_x, mb_y, color, obmc_edged, &best_rd);
1340                 //FIXME RD style color selection
1341                 if(!same_block(block, &backup)){
1342                     if(tb ) tb ->type &= ~BLOCK_OPT;
1343                     if(lb ) lb ->type &= ~BLOCK_OPT;
1344                     if(rb ) rb ->type &= ~BLOCK_OPT;
1345                     if(bb ) bb ->type &= ~BLOCK_OPT;
1346                     if(tlb) tlb->type &= ~BLOCK_OPT;
1347                     if(trb) trb->type &= ~BLOCK_OPT;
1348                     if(blb) blb->type &= ~BLOCK_OPT;
1349                     if(brb) brb->type &= ~BLOCK_OPT;
1350                     change ++;
1351                 }
1352             }
1353         }
1354         av_log(s->avctx, AV_LOG_DEBUG, "pass:%d changed:%d\n", pass, change);
1355         if(!change)
1356             break;
1357     }
1358
1359     if(s->block_max_depth == 1){
1360         int change= 0;
1361         for(mb_y= 0; mb_y<b_height; mb_y+=2){
1362             for(mb_x= 0; mb_x<b_width; mb_x+=2){
1363                 int i;
1364                 int best_rd, init_rd;
1365                 const int index= mb_x + mb_y * b_stride;
1366                 BlockNode *b[4];
1367
1368                 b[0]= &s->block[index];
1369                 b[1]= b[0]+1;
1370                 b[2]= b[0]+b_stride;
1371                 b[3]= b[2]+1;
1372                 if(same_block(b[0], b[1]) &&
1373                    same_block(b[0], b[2]) &&
1374                    same_block(b[0], b[3]))
1375                     continue;
1376
1377                 if (!enc->me_cache_generation)
1378                     memset(enc->me_cache, 0, sizeof(enc->me_cache));
1379                 enc->me_cache_generation += 1<<22;
1380
1381                 init_rd = best_rd = get_4block_rd(enc, mb_x, mb_y, 0);
1382
1383                 //FIXME more multiref search?
1384                 check_4block_inter(enc, mb_x, mb_y,
1385                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
1386                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
1387
1388                 for(i=0; i<4; i++)
1389                     if(!(b[i]->type&BLOCK_INTRA))
1390                         check_4block_inter(enc, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
1391
1392                 if(init_rd != best_rd)
1393                     change++;
1394             }
1395         }
1396         av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
1397     }
1398 }
1399
1400 static void encode_blocks(SnowEncContext *enc, int search)
1401 {
1402     SnowContext *const s = &enc->com;
1403     int x, y;
1404     int w= s->b_width;
1405     int h= s->b_height;
1406
1407     if (enc->motion_est == FF_ME_ITER && !s->keyframe && search)
1408         iterative_me(enc);
1409
1410     for(y=0; y<h; y++){
1411         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
1412             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1413             return;
1414         }
1415         for(x=0; x<w; x++){
1416             if (enc->motion_est == FF_ME_ITER || !search)
1417                 encode_q_branch2(s, 0, x, y);
1418             else
1419                 encode_q_branch (enc, 0, x, y);
1420         }
1421     }
1422 }
1423
1424 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
1425     const int w= b->width;
1426     const int h= b->height;
1427     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1428     const int qmul= ff_qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
1429     int x,y, thres1, thres2;
1430
1431     if(s->qlog == LOSSLESS_QLOG){
1432         for(y=0; y<h; y++)
1433             for(x=0; x<w; x++)
1434                 dst[x + y*stride]= src[x + y*stride];
1435         return;
1436     }
1437
1438     bias= bias ? 0 : (3*qmul)>>3;
1439     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
1440     thres2= 2*thres1;
1441
1442     if(!bias){
1443         for(y=0; y<h; y++){
1444             for(x=0; x<w; x++){
1445                 int i= src[x + y*stride];
1446
1447                 if((unsigned)(i+thres1) > thres2){
1448                     if(i>=0){
1449                         i<<= QEXPSHIFT;
1450                         i/= qmul; //FIXME optimize
1451                         dst[x + y*stride]=  i;
1452                     }else{
1453                         i= -i;
1454                         i<<= QEXPSHIFT;
1455                         i/= qmul; //FIXME optimize
1456                         dst[x + y*stride]= -i;
1457                     }
1458                 }else
1459                     dst[x + y*stride]= 0;
1460             }
1461         }
1462     }else{
1463         for(y=0; y<h; y++){
1464             for(x=0; x<w; x++){
1465                 int i= src[x + y*stride];
1466
1467                 if((unsigned)(i+thres1) > thres2){
1468                     if(i>=0){
1469                         i<<= QEXPSHIFT;
1470                         i= (i + bias) / qmul; //FIXME optimize
1471                         dst[x + y*stride]=  i;
1472                     }else{
1473                         i= -i;
1474                         i<<= QEXPSHIFT;
1475                         i= (i + bias) / qmul; //FIXME optimize
1476                         dst[x + y*stride]= -i;
1477                     }
1478                 }else
1479                     dst[x + y*stride]= 0;
1480             }
1481         }
1482     }
1483 }
1484
1485 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
1486     const int w= b->width;
1487     const int h= b->height;
1488     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1489     const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1490     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1491     int x,y;
1492
1493     if(s->qlog == LOSSLESS_QLOG) return;
1494
1495     for(y=0; y<h; y++){
1496         for(x=0; x<w; x++){
1497             int i= src[x + y*stride];
1498             if(i<0){
1499                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1500             }else if(i>0){
1501                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1502             }
1503         }
1504     }
1505 }
1506
1507 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
1508     const int w= b->width;
1509     const int h= b->height;
1510     int x,y;
1511
1512     for(y=h-1; y>=0; y--){
1513         for(x=w-1; x>=0; x--){
1514             int i= x + y*stride;
1515
1516             if(x){
1517                 if(use_median){
1518                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1519                     else  src[i] -= src[i - 1];
1520                 }else{
1521                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1522                     else  src[i] -= src[i - 1];
1523                 }
1524             }else{
1525                 if(y) src[i] -= src[i - stride];
1526             }
1527         }
1528     }
1529 }
1530
1531 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
1532     const int w= b->width;
1533     const int h= b->height;
1534     int x,y;
1535
1536     for(y=0; y<h; y++){
1537         for(x=0; x<w; x++){
1538             int i= x + y*stride;
1539
1540             if(x){
1541                 if(use_median){
1542                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1543                     else  src[i] += src[i - 1];
1544                 }else{
1545                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1546                     else  src[i] += src[i - 1];
1547                 }
1548             }else{
1549                 if(y) src[i] += src[i - stride];
1550             }
1551         }
1552     }
1553 }
1554
1555 static void encode_qlogs(SnowContext *s){
1556     int plane_index, level, orientation;
1557
1558     for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
1559         for(level=0; level<s->spatial_decomposition_count; level++){
1560             for(orientation=level ? 1:0; orientation<4; orientation++){
1561                 if(orientation==2) continue;
1562                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
1563             }
1564         }
1565     }
1566 }
1567
1568 static void encode_header(SnowContext *s){
1569     int plane_index, i;
1570     uint8_t kstate[32];
1571
1572     memset(kstate, MID_STATE, sizeof(kstate));
1573
1574     put_rac(&s->c, kstate, s->keyframe);
1575     if(s->keyframe || s->always_reset){
1576         ff_snow_reset_contexts(s);
1577         s->last_spatial_decomposition_type=
1578         s->last_qlog=
1579         s->last_qbias=
1580         s->last_mv_scale=
1581         s->last_block_max_depth= 0;
1582         for(plane_index=0; plane_index<2; plane_index++){
1583             Plane *p= &s->plane[plane_index];
1584             p->last_htaps=0;
1585             p->last_diag_mc=0;
1586             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
1587         }
1588     }
1589     if(s->keyframe){
1590         put_symbol(&s->c, s->header_state, s->version, 0);
1591         put_rac(&s->c, s->header_state, s->always_reset);
1592         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
1593         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
1594         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1595         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
1596         if (s->nb_planes > 2) {
1597             put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
1598             put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
1599         }
1600         put_rac(&s->c, s->header_state, s->spatial_scalability);
1601 //        put_rac(&s->c, s->header_state, s->rate_scalability);
1602         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
1603
1604         encode_qlogs(s);
1605     }
1606
1607     if(!s->keyframe){
1608         int update_mc=0;
1609         for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
1610             Plane *p= &s->plane[plane_index];
1611             update_mc |= p->last_htaps   != p->htaps;
1612             update_mc |= p->last_diag_mc != p->diag_mc;
1613             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
1614         }
1615         put_rac(&s->c, s->header_state, update_mc);
1616         if(update_mc){
1617             for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
1618                 Plane *p= &s->plane[plane_index];
1619                 put_rac(&s->c, s->header_state, p->diag_mc);
1620                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
1621                 for(i= p->htaps/2; i; i--)
1622                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
1623             }
1624         }
1625         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
1626             put_rac(&s->c, s->header_state, 1);
1627             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1628             encode_qlogs(s);
1629         }else
1630             put_rac(&s->c, s->header_state, 0);
1631     }
1632
1633     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
1634     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
1635     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
1636     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
1637     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
1638
1639 }
1640
1641 static void update_last_header_values(SnowContext *s){
1642     int plane_index;
1643
1644     if(!s->keyframe){
1645         for(plane_index=0; plane_index<2; plane_index++){
1646             Plane *p= &s->plane[plane_index];
1647             p->last_diag_mc= p->diag_mc;
1648             p->last_htaps  = p->htaps;
1649             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
1650         }
1651     }
1652
1653     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
1654     s->last_qlog                        = s->qlog;
1655     s->last_qbias                       = s->qbias;
1656     s->last_mv_scale                    = s->mv_scale;
1657     s->last_block_max_depth             = s->block_max_depth;
1658     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
1659 }
1660
1661 static int qscale2qlog(int qscale){
1662     return lrint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
1663            + 61*QROOT/8; ///< 64 > 60
1664 }
1665
1666 static int ratecontrol_1pass(SnowEncContext *enc, AVFrame *pict)
1667 {
1668     SnowContext *const s = &enc->com;
1669     /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
1670      * FIXME we know exact mv bits at this point,
1671      * but ratecontrol isn't set up to include them. */
1672     uint32_t coef_sum= 0;
1673     int level, orientation, delta_qlog;
1674
1675     for(level=0; level<s->spatial_decomposition_count; level++){
1676         for(orientation=level ? 1 : 0; orientation<4; orientation++){
1677             SubBand *b= &s->plane[0].band[level][orientation];
1678             IDWTELEM *buf= b->ibuf;
1679             const int w= b->width;
1680             const int h= b->height;
1681             const int stride= b->stride;
1682             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
1683             const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1684             const int qdiv= (1<<16)/qmul;
1685             int x, y;
1686             //FIXME this is ugly
1687             for(y=0; y<h; y++)
1688                 for(x=0; x<w; x++)
1689                     buf[x+y*stride]= b->buf[x+y*stride];
1690             if(orientation==0)
1691                 decorrelate(s, b, buf, stride, 1, 0);
1692             for(y=0; y<h; y++)
1693                 for(x=0; x<w; x++)
1694                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
1695         }
1696     }
1697
1698     /* ugly, ratecontrol just takes a sqrt again */
1699     av_assert0(coef_sum < INT_MAX);
1700     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
1701
1702     if(pict->pict_type == AV_PICTURE_TYPE_I){
1703         enc->m.mb_var_sum    = coef_sum;
1704         enc->m.mc_mb_var_sum = 0;
1705     }else{
1706         enc->m.mc_mb_var_sum = coef_sum;
1707         enc->m.mb_var_sum    = 0;
1708     }
1709
1710     pict->quality= ff_rate_estimate_qscale(&enc->m, 1);
1711     if (pict->quality < 0)
1712         return INT_MIN;
1713     enc->lambda= pict->quality * 3/2;
1714     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
1715     s->qlog+= delta_qlog;
1716     return delta_qlog;
1717 }
1718
1719 static void calculate_visual_weight(SnowContext *s, Plane *p){
1720     int width = p->width;
1721     int height= p->height;
1722     int level, orientation, x, y;
1723
1724     for(level=0; level<s->spatial_decomposition_count; level++){
1725         int64_t error=0;
1726         for(orientation=level ? 1 : 0; orientation<4; orientation++){
1727             SubBand *b= &p->band[level][orientation];
1728             IDWTELEM *ibuf= b->ibuf;
1729
1730             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
1731             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
1732             ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
1733             for(y=0; y<height; y++){
1734                 for(x=0; x<width; x++){
1735                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
1736                     error += d*d;
1737                 }
1738             }
1739             if (orientation == 2)
1740                 error /= 2;
1741             b->qlog= (int)(QROOT * log2(352256.0/sqrt(error)) + 0.5);
1742             if (orientation != 1)
1743                 error = 0;
1744         }
1745         p->band[level][1].qlog = p->band[level][2].qlog;
1746     }
1747 }
1748
1749 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1750                         const AVFrame *pict, int *got_packet)
1751 {
1752     SnowEncContext *const enc = avctx->priv_data;
1753     SnowContext *const s = &enc->com;
1754     MpegEncContext *const mpv = &enc->m;
1755     RangeCoder * const c= &s->c;
1756     AVCodecInternal *avci = avctx->internal;
1757     AVFrame *pic;
1758     const int width= s->avctx->width;
1759     const int height= s->avctx->height;
1760     int level, orientation, plane_index, i, y, ret;
1761     uint8_t rc_header_bak[sizeof(s->header_state)];
1762     uint8_t rc_block_bak[sizeof(s->block_state)];
1763
1764     if ((ret = ff_alloc_packet(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1765         return ret;
1766
1767     ff_init_range_encoder(c, pkt->data, pkt->size);
1768     ff_build_rac_states(c, (1LL<<32)/20, 256-8);
1769
1770     for(i=0; i < s->nb_planes; i++){
1771         int hshift= i ? s->chroma_h_shift : 0;
1772         int vshift= i ? s->chroma_v_shift : 0;
1773         for(y=0; y<AV_CEIL_RSHIFT(height, vshift); y++)
1774             memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
1775                    &pict->data[i][y * pict->linesize[i]],
1776                    AV_CEIL_RSHIFT(width, hshift));
1777         enc->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
1778                                 AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift),
1779                                 EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1780                                 EDGE_TOP | EDGE_BOTTOM);
1781
1782     }
1783     emms_c();
1784     pic = s->input_picture;
1785     pic->pict_type = pict->pict_type;
1786     pic->quality = pict->quality;
1787
1788     mpv->picture_number = avctx->frame_num;
1789     if(avctx->flags&AV_CODEC_FLAG_PASS2){
1790         mpv->pict_type = pic->pict_type = mpv->rc_context.entry[avctx->frame_num].new_pict_type;
1791         s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I;
1792         if(!(avctx->flags&AV_CODEC_FLAG_QSCALE)) {
1793             pic->quality = ff_rate_estimate_qscale(mpv, 0);
1794             if (pic->quality < 0)
1795                 return -1;
1796         }
1797     }else{
1798         s->keyframe= avctx->gop_size==0 || avctx->frame_num % avctx->gop_size == 0;
1799         mpv->pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1800     }
1801
1802     if (enc->pass1_rc && avctx->frame_num == 0)
1803         pic->quality = 2*FF_QP2LAMBDA;
1804     if (pic->quality) {
1805         s->qlog   = qscale2qlog(pic->quality);
1806         enc->lambda = pic->quality * 3/2;
1807     }
1808     if (s->qlog < 0 || (!pic->quality && (avctx->flags & AV_CODEC_FLAG_QSCALE))) {
1809         s->qlog= LOSSLESS_QLOG;
1810         enc->lambda = 0;
1811     }//else keep previous frame's qlog until after motion estimation
1812
1813     if (s->current_picture->data[0]) {
1814         int w = s->avctx->width;
1815         int h = s->avctx->height;
1816
1817         enc->mpvencdsp.draw_edges(s->current_picture->data[0],
1818                                   s->current_picture->linesize[0], w   , h   ,
1819                                   EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
1820         if (s->current_picture->data[2]) {
1821             enc->mpvencdsp.draw_edges(s->current_picture->data[1],
1822                                       s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
1823                                       EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
1824             enc->mpvencdsp.draw_edges(s->current_picture->data[2],
1825                                       s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
1826                                       EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
1827         }
1828         emms_c();
1829     }
1830
1831     ff_snow_frames_prepare(s);
1832     ret = get_encode_buffer(s, s->current_picture);
1833     if (ret < 0)
1834         return ret;
1835
1836     mpv->current_picture_ptr    = &mpv->current_picture;
1837     mpv->current_picture.f      = s->current_picture;
1838     mpv->current_picture.f->pts = pict->pts;
1839     if(pic->pict_type == AV_PICTURE_TYPE_P){
1840         int block_width = (width +15)>>4;
1841         int block_height= (height+15)>>4;
1842         int stride= s->current_picture->linesize[0];
1843
1844         av_assert0(s->current_picture->data[0]);
1845         av_assert0(s->last_picture[0]->data[0]);
1846
1847         mpv->avctx = s->avctx;
1848         mpv->last_picture.f   = s->last_picture[0];
1849         mpv-> new_picture     = s->input_picture;
1850         mpv->last_picture_ptr = &mpv->last_picture;
1851         mpv->linesize   = stride;
1852         mpv->uvlinesize = s->current_picture->linesize[1];
1853         mpv->width      = width;
1854         mpv->height     = height;
1855         mpv->mb_width   = block_width;
1856         mpv->mb_height  = block_height;
1857         mpv->mb_stride  =     mpv->mb_width + 1;
1858         mpv->b8_stride  = 2 * mpv->mb_width + 1;
1859         mpv->f_code     = 1;
1860         mpv->pict_type  = pic->pict_type;
1861         mpv->motion_est = enc->motion_est;
1862         mpv->me.scene_change_score = 0;
1863         mpv->me.dia_size = avctx->dia_size;
1864         mpv->quarter_sample  = (s->avctx->flags & AV_CODEC_FLAG_QPEL)!=0;
1865         mpv->out_format      = FMT_H263;
1866         mpv->unrestricted_mv = 1;
1867
1868         mpv->lambda = enc->lambda;
1869         mpv->qscale = (mpv->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
1870         enc->lambda2  = mpv->lambda2 = (mpv->lambda*mpv->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
1871
1872         mpv->mecc = enc->mecc; //move
1873         mpv->qdsp = enc->qdsp; //move
1874         mpv->hdsp = s->hdsp;
1875         ff_init_me(&enc->m);
1876         s->hdsp = mpv->hdsp;
1877         enc->mecc = mpv->mecc;
1878     }
1879
1880     if (enc->pass1_rc) {
1881         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
1882         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
1883     }
1884
1885 redo_frame:
1886
1887     s->spatial_decomposition_count= 5;
1888
1889     while(   !(width >>(s->chroma_h_shift + s->spatial_decomposition_count))
1890           || !(height>>(s->chroma_v_shift + s->spatial_decomposition_count)))
1891         s->spatial_decomposition_count--;
1892
1893     if (s->spatial_decomposition_count <= 0) {
1894         av_log(avctx, AV_LOG_ERROR, "Resolution too low\n");
1895         return AVERROR(EINVAL);
1896     }
1897
1898     mpv->pict_type = pic->pict_type;
1899     s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
1900
1901     ff_snow_common_init_after_header(avctx);
1902
1903     if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
1904         for(plane_index=0; plane_index < s->nb_planes; plane_index++){
1905             calculate_visual_weight(s, &s->plane[plane_index]);
1906         }
1907     }
1908
1909     encode_header(s);
1910     mpv->misc_bits = 8 * (s->c.bytestream - s->c.bytestream_start);
1911     encode_blocks(enc, 1);
1912     mpv->mv_bits   = 8 * (s->c.bytestream - s->c.bytestream_start) - mpv->misc_bits;
1913
1914     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
1915         Plane *p= &s->plane[plane_index];
1916         int w= p->width;
1917         int h= p->height;
1918         int x, y;
1919 //        int bits= put_bits_count(&s->c.pb);
1920
1921         if (!enc->memc_only) {
1922             //FIXME optimize
1923             if(pict->data[plane_index]) //FIXME gray hack
1924                 for(y=0; y<h; y++){
1925                     for(x=0; x<w; x++){
1926                         s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
1927                     }
1928                 }
1929             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
1930
1931             if(   plane_index==0
1932                && pic->pict_type == AV_PICTURE_TYPE_P
1933                && !(avctx->flags&AV_CODEC_FLAG_PASS2)
1934                && mpv->me.scene_change_score > enc->scenechange_threshold) {
1935                 ff_init_range_encoder(c, pkt->data, pkt->size);
1936                 ff_build_rac_states(c, (1LL<<32)/20, 256-8);
1937                 pic->pict_type= AV_PICTURE_TYPE_I;
1938                 s->keyframe=1;
1939                 s->current_picture->flags |= AV_FRAME_FLAG_KEY;
1940                 goto redo_frame;
1941             }
1942
1943             if(s->qlog == LOSSLESS_QLOG){
1944                 for(y=0; y<h; y++){
1945                     for(x=0; x<w; x++){
1946                         s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
1947                     }
1948                 }
1949             }else{
1950                 for(y=0; y<h; y++){
1951                     for(x=0; x<w; x++){
1952                         s->spatial_dwt_buffer[y*w + x]= s->spatial_idwt_buffer[y*w + x] * (1 << ENCODER_EXTRA_BITS);
1953                     }
1954                 }
1955             }
1956
1957             ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
1958
1959             if (enc->pass1_rc && plane_index==0) {
1960                 int delta_qlog = ratecontrol_1pass(enc, pic);
1961                 if (delta_qlog <= INT_MIN)
1962                     return -1;
1963                 if(delta_qlog){
1964                     //reordering qlog in the bitstream would eliminate this reset
1965                     ff_init_range_encoder(c, pkt->data, pkt->size);
1966                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
1967                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
1968                     encode_header(s);
1969                     encode_blocks(enc, 0);
1970                 }
1971             }
1972
1973             for(level=0; level<s->spatial_decomposition_count; level++){
1974                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1975                     SubBand *b= &p->band[level][orientation];
1976
1977                     quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
1978                     if(orientation==0)
1979                         decorrelate(s, b, b->ibuf, b->stride, pic->pict_type == AV_PICTURE_TYPE_P, 0);
1980                     if (!enc->no_bitstream)
1981                     encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
1982                     av_assert0(b->parent==NULL || b->parent->stride == b->stride*2);
1983                     if(orientation==0)
1984                         correlate(s, b, b->ibuf, b->stride, 1, 0);
1985                 }
1986             }
1987
1988             for(level=0; level<s->spatial_decomposition_count; level++){
1989                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1990                     SubBand *b= &p->band[level][orientation];
1991
1992                     dequantize(s, b, b->ibuf, b->stride);
1993                 }
1994             }
1995
1996             ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
1997             if(s->qlog == LOSSLESS_QLOG){
1998                 for(y=0; y<h; y++){
1999                     for(x=0; x<w; x++){
2000                         s->spatial_idwt_buffer[y*w + x] *= 1 << FRAC_BITS;
2001                     }
2002                 }
2003             }
2004             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2005         }else{
2006             //ME/MC only
2007             if(pic->pict_type == AV_PICTURE_TYPE_I){
2008                 for(y=0; y<h; y++){
2009                     for(x=0; x<w; x++){
2010                         s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x]=
2011                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
2012                     }
2013                 }
2014             }else{
2015                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
2016                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2017             }
2018         }
2019         if(s->avctx->flags&AV_CODEC_FLAG_PSNR){
2020             int64_t error= 0;
2021
2022             if(pict->data[plane_index]) //FIXME gray hack
2023                 for(y=0; y<h; y++){
2024                     for(x=0; x<w; x++){
2025                         int d= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
2026                         error += d*d;
2027                     }
2028                 }
2029             s->avctx->error[plane_index] += error;
2030             enc->encoding_error[plane_index] = error;
2031         }
2032
2033     }
2034     emms_c();
2035
2036     update_last_header_values(s);
2037
2038     ff_snow_release_buffer(avctx);
2039
2040     s->current_picture->pict_type = pic->pict_type;
2041     s->current_picture->quality = pic->quality;
2042     mpv->frame_bits = 8 * (s->c.bytestream - s->c.bytestream_start);
2043     mpv->p_tex_bits = mpv->frame_bits - mpv->misc_bits - mpv->mv_bits;
2044     mpv->total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
2045     mpv->current_picture.display_picture_number =
2046     mpv->current_picture.coded_picture_number   = avctx->frame_num;
2047     mpv->current_picture.f->quality             = pic->quality;
2048     if (enc->pass1_rc)
2049         if (ff_rate_estimate_qscale(mpv, 0) < 0)
2050             return -1;
2051     if(avctx->flags&AV_CODEC_FLAG_PASS1)
2052         ff_write_pass1_stats(mpv);
2053     mpv->last_pict_type = mpv->pict_type;
2054
2055     emms_c();
2056
2057     ff_side_data_set_encoder_stats(pkt, s->current_picture->quality,
2058                                    enc->encoding_error,
2059                                    (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? SNOW_MAX_PLANES : 0,
2060                                    s->current_picture->pict_type);
2061     if (s->avctx->flags & AV_CODEC_FLAG_RECON_FRAME) {
2062         av_frame_replace(avci->recon_frame, s->current_picture);
2063     }
2064
2065     pkt->size = ff_rac_terminate(c, 0);
2066     if (s->current_picture->flags & AV_FRAME_FLAG_KEY)
2067         pkt->flags |= AV_PKT_FLAG_KEY;
2068     *got_packet = 1;
2069
2070     return 0;
2071 }
2072
2073 static av_cold int encode_end(AVCodecContext *avctx)
2074 {
2075     SnowEncContext *const enc = avctx->priv_data;
2076     SnowContext *const s = &enc->com;
2077
2078     ff_snow_common_end(s);
2079     ff_rate_control_uninit(&enc->m);
2080     av_frame_free(&s->input_picture);
2081
2082     for (int i = 0; i < MAX_REF_FRAMES; i++) {
2083         av_freep(&s->ref_mvs[i]);
2084         av_freep(&s->ref_scores[i]);
2085     }
2086
2087     enc->m.me.temp = NULL;
2088     av_freep(&enc->m.me.scratchpad);
2089     av_freep(&enc->m.me.map);
2090     av_freep(&enc->m.sc.obmc_scratchpad);
2091
2092     av_freep(&avctx->stats_out);
2093
2094     return 0;
2095 }
2096
2097 #define OFFSET(x) offsetof(SnowEncContext, x)
2098 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2099 static const AVOption options[] = {
2100     {"motion_est", "motion estimation algorithm", OFFSET(motion_est), AV_OPT_TYPE_INT, {.i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_ITER, VE, "motion_est" },
2101     { "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, VE, "motion_est" },
2102     { "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, VE, "motion_est" },
2103     { "xone", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_XONE }, 0, 0, VE, "motion_est" },
2104     { "iter", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ITER }, 0, 0, VE, "motion_est" },
2105     { "memc_only",      "Only do ME/MC (I frames -> ref, P frame -> ME+MC).",   OFFSET(memc_only), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2106     { "no_bitstream",   "Skip final bitstream writeout.",                    OFFSET(no_bitstream), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2107     { "intra_penalty",  "Penalty for intra blocks in block decission",      OFFSET(intra_penalty), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
2108     { "iterative_dia_size",  "Dia size for the iterative ME",          OFFSET(iterative_dia_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
2109     { "sc_threshold",   "Scene change threshold",                   OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, VE },
2110     { "pred",           "Spatial decomposition type",                                OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 0 }, DWT_97, DWT_53, VE, "pred" },
2111         { "dwt97", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, "pred" },
2112         { "dwt53", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "pred" },
2113     { "rc_eq", "Set rate control equation. When computing the expression, besides the standard functions "
2114      "defined in the section 'Expression Evaluation', the following functions are available: "
2115      "bits2qp(bits), qp2bits(qp). Also the following constants are available: iTex pTex tex mv "
2116      "fCode iCount mcVar var isI isP isB avgQP qComp avgIITex avgPITex avgPPTex avgBPTex avgTex.",
2117                                                                                   OFFSET(m.rc_eq), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VE },
2118     { NULL },
2119 };
2120
2121 static const AVClass snowenc_class = {
2122     .class_name = "snow encoder",
2123     .item_name  = av_default_item_name,
2124     .option     = options,
2125     .version    = LIBAVUTIL_VERSION_INT,
2126 };
2127
2128 const FFCodec ff_snow_encoder = {
2129     .p.name         = "snow",
2130     CODEC_LONG_NAME("Snow"),
2131     .p.type         = AVMEDIA_TYPE_VIDEO,
2132     .p.id           = AV_CODEC_ID_SNOW,
2133     .p.capabilities = AV_CODEC_CAP_DR1 |
2134                       AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE |
2135                       AV_CODEC_CAP_ENCODER_RECON_FRAME,
2136     .priv_data_size = sizeof(SnowEncContext),
2137     .init           = encode_init,
2138     FF_CODEC_ENCODE_CB(encode_frame),
2139     .close          = encode_end,
2140     .p.pix_fmts     = (const enum AVPixelFormat[]){
2141         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV444P,
2142         AV_PIX_FMT_GRAY8,
2143         AV_PIX_FMT_NONE
2144     },
2145     .p.priv_class   = &snowenc_class,
2146     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
2147 };