src/third_party/ffmpeg/libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905
 100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 101                  *             19952 <=              x  <= 249205026
 102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 103                  *           3444240 >= (1 << 36) / (x) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 106                                         (qscale * quant_matrix[j]));
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 /* 16 <= qscale * quant_matrix[i] <= 7905
 112                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 113                  *             19952 <=              x  <= 249205026
 114                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 115                  *           3444240 >= (1 << 36) / (x) >= 275 */
 116
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 118                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 124                  * Assume x = qscale * quant_matrix[i]
 125                  * So             16 <=              x  <= 7905
 126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 127                  * so          32768 >= (1 << 19) / (x) >= 67 */
 128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 129                                         (qscale * quant_matrix[j]));
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 133                                        (qscale * quant_matrix[j]);
 134
 135                 if (qmat16[qscale][0][i] == 0 ||
 136                     qmat16[qscale][0][i] == 128 * 256)
 137                     qmat16[qscale][0][i] = 128 * 256 - 1;
 138                 qmat16[qscale][1][i] =
 139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 140                                 qmat16[qscale][0][i]);
 141             }
 142         }
 143
 144         for (i = intra; i < 64; i++) {
 145             int64_t max = 8191;
 146             if (fdsp->fdct == ff_fdct_ifast) {
 147                 max = (8191LL * ff_aanscales[i]) >> 14;
 148             }
 149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 150                 shift++;
 151             }
 152         }
 153     }
 154     if (shift) {
 155         av_log(NULL, AV_LOG_INFO,
 156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 157                QMAT_SHIFT - shift);
 158     }
 159 }
 160
 161 static inline void update_qscale(MpegEncContext *s)
 162 {
 163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 164                 (FF_LAMBDA_SHIFT + 7);
 165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 166
 167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 168                  FF_LAMBDA_SHIFT;
 169 }
 170
 171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 172 {
 173     int i;
 174
 175     if (matrix) {
 176         put_bits(pb, 1, 1);
 177         for (i = 0; i < 64; i++) {
 178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 179         }
 180     } else
 181         put_bits(pb, 1, 0);
 182 }
 183
 184 /**
 185  * init s->current_picture.qscale_table from s->lambda_table
 186  */
 187 void ff_init_qscale_tab(MpegEncContext *s)
 188 {
 189     int8_t * const qscale_table = s->current_picture.qscale_table;
 190     int i;
 191
 192     for (i = 0; i < s->mb_num; i++) {
 193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 196                                                   s->avctx->qmax);
 197     }
 198 }
 199
 200 static void update_duplicate_context_after_me(MpegEncContext *dst,
 201                                               MpegEncContext *src)
 202 {
 203 #define COPY(a) dst->a= src->a
 204     COPY(pict_type);
 205     COPY(current_picture);
 206     COPY(f_code);
 207     COPY(b_code);
 208     COPY(qscale);
 209     COPY(lambda);
 210     COPY(lambda2);
 211     COPY(picture_in_gop_number);
 212     COPY(gop_picture_number);
 213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 214     COPY(progressive_frame);    // FIXME don't set in encode_header
 215     COPY(partitioned_frame);    // FIXME don't set in encode_header
 216 #undef COPY
 217 }
 218
 219 /**
 220  * Set the given MpegEncContext to defaults for encoding.
 221  * the changed fields will not depend upon the prior state of the MpegEncContext.
 222  */
 223 static void mpv_encode_defaults(MpegEncContext *s)
 224 {
 225     int i;
 226     ff_mpv_common_defaults(s);
 227
 228     for (i = -16; i < 16; i++) {
 229         default_fcode_tab[i + MAX_MV] = 1;
 230     }
 231     s->me.mv_penalty = default_mv_penalty;
 232     s->fcode_tab     = default_fcode_tab;
 233
 234     s->input_picture_number  = 0;
 235     s->picture_in_gop_number = 0;
 236 }
 237
 238 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 239     if (ARCH_X86)
 240         ff_dct_encode_init_x86(s);
 241
 242     if (CONFIG_H263_ENCODER)
 243         ff_h263dsp_init(&s->h263dsp);
 244     if (!s->dct_quantize)
 245         s->dct_quantize = ff_dct_quantize_c;
 246     if (!s->denoise_dct)
 247         s->denoise_dct  = denoise_dct_c;
 248     s->fast_dct_quantize = s->dct_quantize;
 249     if (s->avctx->trellis)
 250         s->dct_quantize  = dct_quantize_trellis_c;
 251
 252     return 0;
 253 }
 254
 255 /* init video encoder */
 256 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 257 {
 258     MpegEncContext *s = avctx->priv_data;
 259     int i, ret, format_supported;
 260
 261     mpv_encode_defaults(s);
 262
 263     switch (avctx->codec_id) {
 264     case AV_CODEC_ID_MPEG2VIDEO:
 265         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 266             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 267             av_log(avctx, AV_LOG_ERROR,
 268                    "only YUV420 and YUV422 are supported\n");
 269             return -1;
 270         }
 271         break;
 272     case AV_CODEC_ID_MJPEG:
 273     case AV_CODEC_ID_AMV:
 274         format_supported = 0;
 275         /* JPEG color space */
 276         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 278             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 279             (avctx->color_range == AVCOL_RANGE_JPEG &&
 280              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 282               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 283             format_supported = 1;
 284         /* MPEG color space */
 285         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 286                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 288                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 289             format_supported = 1;
 290
 291         if (!format_supported) {
 292             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 293             return -1;
 294         }
 295         break;
 296     default:
 297         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 298             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 299             return -1;
 300         }
 301     }
 302
 303     switch (avctx->pix_fmt) {
 304     case AV_PIX_FMT_YUVJ444P:
 305     case AV_PIX_FMT_YUV444P:
 306         s->chroma_format = CHROMA_444;
 307         break;
 308     case AV_PIX_FMT_YUVJ422P:
 309     case AV_PIX_FMT_YUV422P:
 310         s->chroma_format = CHROMA_422;
 311         break;
 312     case AV_PIX_FMT_YUVJ420P:
 313     case AV_PIX_FMT_YUV420P:
 314     default:
 315         s->chroma_format = CHROMA_420;
 316         break;
 317     }
 318
 319     s->bit_rate = avctx->bit_rate;
 320     s->width    = avctx->width;
 321     s->height   = avctx->height;
 322     if (avctx->gop_size > 600 &&
 323         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 324         av_log(avctx, AV_LOG_WARNING,
 325                "keyframe interval too large!, reducing it from %d to %d\n",
 326                avctx->gop_size, 600);
 327         avctx->gop_size = 600;
 328     }
 329     s->gop_size     = avctx->gop_size;
 330     s->avctx        = avctx;
 331     s->flags        = avctx->flags;
 332     s->flags2       = avctx->flags2;
 333     if (avctx->max_b_frames > MAX_B_FRAMES) {
 334         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 335                "is %d.\n", MAX_B_FRAMES);
 336         avctx->max_b_frames = MAX_B_FRAMES;
 337     }
 338     s->max_b_frames = avctx->max_b_frames;
 339     s->codec_id     = avctx->codec->id;
 340     s->strict_std_compliance = avctx->strict_std_compliance;
 341     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 342     s->mpeg_quant         = avctx->mpeg_quant;
 343     s->rtp_mode           = !!avctx->rtp_payload_size;
 344     s->intra_dc_precision = avctx->intra_dc_precision;
 345
 346     // workaround some differences between how applications specify dc precission
 347     if (s->intra_dc_precision < 0) {
 348         s->intra_dc_precision += 8;
 349     } else if (s->intra_dc_precision >= 8)
 350         s->intra_dc_precision -= 8;
 351
 352     if (s->intra_dc_precision < 0) {
 353         av_log(avctx, AV_LOG_ERROR,
 354                 "intra dc precision must be positive, note some applications use"
 355                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 356         return AVERROR(EINVAL);
 357     }
 358
 359     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 360         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 361         return AVERROR(EINVAL);
 362     }
 363     s->user_specified_pts = AV_NOPTS_VALUE;
 364
 365     if (s->gop_size <= 1) {
 366         s->intra_only = 1;
 367         s->gop_size   = 12;
 368     } else {
 369         s->intra_only = 0;
 370     }
 371
 372     s->me_method = avctx->me_method;
 373
 374     /* Fixed QSCALE */
 375     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 376
 377     s->adaptive_quant = (s->avctx->lumi_masking ||
 378                          s->avctx->dark_masking ||
 379                          s->avctx->temporal_cplx_masking ||
 380                          s->avctx->spatial_cplx_masking  ||
 381                          s->avctx->p_masking      ||
 382                          s->avctx->border_masking ||
 383                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 384                         !s->fixed_qscale;
 385
 386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 387
 388     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 389         switch(avctx->codec_id) {
 390         case AV_CODEC_ID_MPEG1VIDEO:
 391         case AV_CODEC_ID_MPEG2VIDEO:
 392             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 393             break;
 394         case AV_CODEC_ID_MPEG4:
 395         case AV_CODEC_ID_MSMPEG4V1:
 396         case AV_CODEC_ID_MSMPEG4V2:
 397         case AV_CODEC_ID_MSMPEG4V3:
 398             if       (avctx->rc_max_rate >= 15000000) {
 399                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 400             } else if(avctx->rc_max_rate >=  2000000) {
 401                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 402             } else if(avctx->rc_max_rate >=   384000) {
 403                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 404             } else
 405                 avctx->rc_buffer_size = 40;
 406             avctx->rc_buffer_size *= 16384;
 407             break;
 408         }
 409         if (avctx->rc_buffer_size) {
 410             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 411         }
 412     }
 413
 414     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 415         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 416         return -1;
 417     }
 418
 419     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 420         av_log(avctx, AV_LOG_INFO,
 421                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 422     }
 423
 424     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 425         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 426         return -1;
 427     }
 428
 429     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 430         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 431         return -1;
 432     }
 433
 434     if (avctx->rc_max_rate &&
 435         avctx->rc_max_rate == avctx->bit_rate &&
 436         avctx->rc_max_rate != avctx->rc_min_rate) {
 437         av_log(avctx, AV_LOG_INFO,
 438                "impossible bitrate constraints, this will fail\n");
 439     }
 440
 441     if (avctx->rc_buffer_size &&
 442         avctx->bit_rate * (int64_t)avctx->time_base.num >
 443             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 444         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 445         return -1;
 446     }
 447
 448     if (!s->fixed_qscale &&
 449         avctx->bit_rate * av_q2d(avctx->time_base) >
 450             avctx->bit_rate_tolerance) {
 451         av_log(avctx, AV_LOG_WARNING,
 452                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 453         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 454     }
 455
 456     if (s->avctx->rc_max_rate &&
 457         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 458         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 459          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 460         90000LL * (avctx->rc_buffer_size - 1) >
 461             s->avctx->rc_max_rate * 0xFFFFLL) {
 462         av_log(avctx, AV_LOG_INFO,
 463                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 464                "specified vbv buffer is too large for the given bitrate!\n");
 465     }
 466
 467     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 468         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 469         s->codec_id != AV_CODEC_ID_FLV1) {
 470         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 471         return -1;
 472     }
 473
 474     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 475         av_log(avctx, AV_LOG_ERROR,
 476                "OBMC is only supported with simple mb decision\n");
 477         return -1;
 478     }
 479
 480     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 481         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 482         return -1;
 483     }
 484
 485     if (s->max_b_frames                    &&
 486         s->codec_id != AV_CODEC_ID_MPEG4      &&
 487         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 488         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 489         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 490         return -1;
 491     }
 492     if (s->max_b_frames < 0) {
 493         av_log(avctx, AV_LOG_ERROR,
 494                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 495         return -1;
 496     }
 497
 498     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 499          s->codec_id == AV_CODEC_ID_H263  ||
 500          s->codec_id == AV_CODEC_ID_H263P) &&
 501         (avctx->sample_aspect_ratio.num > 255 ||
 502          avctx->sample_aspect_ratio.den > 255)) {
 503         av_log(avctx, AV_LOG_WARNING,
 504                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 505                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 506         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 507                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 508     }
 509
 510     if ((s->codec_id == AV_CODEC_ID_H263  ||
 511          s->codec_id == AV_CODEC_ID_H263P) &&
 512         (avctx->width  > 2048 ||
 513          avctx->height > 1152 )) {
 514         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 515         return -1;
 516     }
 517     if ((s->codec_id == AV_CODEC_ID_H263  ||
 518          s->codec_id == AV_CODEC_ID_H263P) &&
 519         ((avctx->width &3) ||
 520          (avctx->height&3) )) {
 521         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 522         return -1;
 523     }
 524
 525     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 526         (avctx->width  > 4095 ||
 527          avctx->height > 4095 )) {
 528         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 529         return -1;
 530     }
 531
 532     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 533         (avctx->width  > 16383 ||
 534          avctx->height > 16383 )) {
 535         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 536         return -1;
 537     }
 538
 539     if (s->codec_id == AV_CODEC_ID_RV10 &&
 540         (avctx->width &15 ||
 541          avctx->height&15 )) {
 542         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 543         return AVERROR(EINVAL);
 544     }
 545
 546     if (s->codec_id == AV_CODEC_ID_RV20 &&
 547         (avctx->width &3 ||
 548          avctx->height&3 )) {
 549         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 550         return AVERROR(EINVAL);
 551     }
 552
 553     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 554          s->codec_id == AV_CODEC_ID_WMV2) &&
 555          avctx->width & 1) {
 556          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 557          return -1;
 558     }
 559
 560     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 561         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 562         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 563         return -1;
 564     }
 565
 566     // FIXME mpeg2 uses that too
 567     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 568                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 569         av_log(avctx, AV_LOG_ERROR,
 570                "mpeg2 style quantization not supported by codec\n");
 571         return -1;
 572     }
 573
 574     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 575         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 576         return -1;
 577     }
 578
 579     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 580         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 581         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 582         return -1;
 583     }
 584
 585     if (s->avctx->scenechange_threshold < 1000000000 &&
 586         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 587         av_log(avctx, AV_LOG_ERROR,
 588                "closed gop with scene change detection are not supported yet, "
 589                "set threshold to 1000000000\n");
 590         return -1;
 591     }
 592
 593     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 594         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 595             av_log(avctx, AV_LOG_ERROR,
 596                   "low delay forcing is only available for mpeg2\n");
 597             return -1;
 598         }
 599         if (s->max_b_frames != 0) {
 600             av_log(avctx, AV_LOG_ERROR,
 601                    "b frames cannot be used with low delay\n");
 602             return -1;
 603         }
 604     }
 605
 606     if (s->q_scale_type == 1) {
 607         if (avctx->qmax > 12) {
 608             av_log(avctx, AV_LOG_ERROR,
 609                    "non linear quant only supports qmax <= 12 currently\n");
 610             return -1;
 611         }
 612     }
 613
 614     if (s->avctx->thread_count > 1         &&
 615         s->codec_id != AV_CODEC_ID_MPEG4      &&
 616         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 617         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 618         s->codec_id != AV_CODEC_ID_MJPEG      &&
 619         (s->codec_id != AV_CODEC_ID_H263P)) {
 620         av_log(avctx, AV_LOG_ERROR,
 621                "multi threaded encoding not supported by codec\n");
 622         return -1;
 623     }
 624
 625     if (s->avctx->thread_count < 1) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "automatic thread number detection not supported by codec, "
 628                "patch welcome\n");
 629         return -1;
 630     }
 631
 632     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 633         s->rtp_mode = 1;
 634
 635     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 636         s->h263_slice_structured = 1;
 637
 638     if (!avctx->time_base.den || !avctx->time_base.num) {
 639         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 640         return -1;
 641     }
 642
 643     i = (INT_MAX / 2 + 128) >> 8;
 644     if (avctx->mb_threshold >= i) {
 645         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 646                i - 1);
 647         return -1;
 648     }
 649
 650     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 651         av_log(avctx, AV_LOG_INFO,
 652                "notice: b_frame_strategy only affects the first pass\n");
 653         avctx->b_frame_strategy = 0;
 654     }
 655
 656     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 657     if (i > 1) {
 658         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 659         avctx->time_base.den /= i;
 660         avctx->time_base.num /= i;
 661         //return -1;
 662     }
 663
 664     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 665         // (a + x * 3 / 8) / x
 666         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 667         s->inter_quant_bias = 0;
 668     } else {
 669         s->intra_quant_bias = 0;
 670         // (a - x / 4) / x
 671         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 672     }
 673
 674     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 675         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 676         return AVERROR(EINVAL);
 677     }
 678
 679     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 680         s->intra_quant_bias = avctx->intra_quant_bias;
 681     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 682         s->inter_quant_bias = avctx->inter_quant_bias;
 683
 684     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 685
 686     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 687         s->avctx->time_base.den > (1 << 16) - 1) {
 688         av_log(avctx, AV_LOG_ERROR,
 689                "timebase %d/%d not supported by MPEG 4 standard, "
 690                "the maximum admitted value for the timebase denominator "
 691                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 692                (1 << 16) - 1);
 693         return -1;
 694     }
 695     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 696
 697     switch (avctx->codec->id) {
 698     case AV_CODEC_ID_MPEG1VIDEO:
 699         s->out_format = FMT_MPEG1;
 700         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 701         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 702         break;
 703     case AV_CODEC_ID_MPEG2VIDEO:
 704         s->out_format = FMT_MPEG1;
 705         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 706         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 707         s->rtp_mode   = 1;
 708         break;
 709     case AV_CODEC_ID_MJPEG:
 710     case AV_CODEC_ID_AMV:
 711         s->out_format = FMT_MJPEG;
 712         s->intra_only = 1; /* force intra only for jpeg */
 713         if (!CONFIG_MJPEG_ENCODER ||
 714             ff_mjpeg_encode_init(s) < 0)
 715             return -1;
 716         avctx->delay = 0;
 717         s->low_delay = 1;
 718         break;
 719     case AV_CODEC_ID_H261:
 720         if (!CONFIG_H261_ENCODER)
 721             return -1;
 722         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 723             av_log(avctx, AV_LOG_ERROR,
 724                    "The specified picture size of %dx%d is not valid for the "
 725                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 726                     s->width, s->height);
 727             return -1;
 728         }
 729         s->out_format = FMT_H261;
 730         avctx->delay  = 0;
 731         s->low_delay  = 1;
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         ff_msmpeg4_encode_init(s);
 893     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 894         && s->out_format == FMT_MPEG1)
 895         ff_mpeg1_encode_init(s);
 896
 897     /* init q matrix */
 898     for (i = 0; i < 64; i++) {
 899         int j = s->idsp.idct_permutation[i];
 900         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 901             s->mpeg_quant) {
 902             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 903             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 904         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 905             s->intra_matrix[j] =
 906             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 907         } else {
 908             /* mpeg1/2 */
 909             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 910             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 911         }
 912         if (s->avctx->intra_matrix)
 913             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 914         if (s->avctx->inter_matrix)
 915             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 916     }
 917
 918     /* precompute matrix */
 919     /* for mjpeg, we do include qscale in the matrix */
 920     if (s->out_format != FMT_MJPEG) {
 921         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 922                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 923                           31, 1);
 924         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 925                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 926                           31, 0);
 927     }
 928
 929     if (ff_rate_control_init(s) < 0)
 930         return -1;
 931
 932 #if FF_API_ERROR_RATE
 933     FF_DISABLE_DEPRECATION_WARNINGS
 934     if (avctx->error_rate)
 935         s->error_rate = avctx->error_rate;
 936     FF_ENABLE_DEPRECATION_WARNINGS;
 937 #endif
 938
 939 #if FF_API_NORMALIZE_AQP
 940     FF_DISABLE_DEPRECATION_WARNINGS
 941     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 942         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 943     FF_ENABLE_DEPRECATION_WARNINGS;
 944 #endif
 945
 946 #if FF_API_MV0
 947     FF_DISABLE_DEPRECATION_WARNINGS
 948     if (avctx->flags & CODEC_FLAG_MV0)
 949         s->mpv_flags |= FF_MPV_FLAG_MV0;
 950     FF_ENABLE_DEPRECATION_WARNINGS
 951 #endif
 952
 953     if (avctx->b_frame_strategy == 2) {
 954         for (i = 0; i < s->max_b_frames + 2; i++) {
 955             s->tmp_frames[i] = av_frame_alloc();
 956             if (!s->tmp_frames[i])
 957                 return AVERROR(ENOMEM);
 958
 959             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 960             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 961             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 962
 963             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 964             if (ret < 0)
 965                 return ret;
 966         }
 967     }
 968
 969     return 0;
 970 fail:
 971     ff_mpv_encode_end(avctx);
 972     return AVERROR_UNKNOWN;
 973 }
 974
 975 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 976 {
 977     MpegEncContext *s = avctx->priv_data;
 978     int i;
 979
 980     ff_rate_control_uninit(s);
 981
 982     ff_mpv_common_end(s);
 983     if (CONFIG_MJPEG_ENCODER &&
 984         s->out_format == FMT_MJPEG)
 985         ff_mjpeg_encode_close(s);
 986
 987     av_freep(&avctx->extradata);
 988
 989     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 990         av_frame_free(&s->tmp_frames[i]);
 991
 992     ff_free_picture_tables(&s->new_picture);
 993     ff_mpeg_unref_picture(s, &s->new_picture);
 994
 995     av_freep(&s->avctx->stats_out);
 996     av_freep(&s->ac_stats);
 997
 998     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 999     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1000     s->q_chroma_intra_matrix=   NULL;
1001     s->q_chroma_intra_matrix16= NULL;
1002     av_freep(&s->q_intra_matrix);
1003     av_freep(&s->q_inter_matrix);
1004     av_freep(&s->q_intra_matrix16);
1005     av_freep(&s->q_inter_matrix16);
1006     av_freep(&s->input_picture);
1007     av_freep(&s->reordered_input_picture);
1008     av_freep(&s->dct_offset);
1009
1010     return 0;
1011 }
1012
1013 static int get_sae(uint8_t *src, int ref, int stride)
1014 {
1015     int x,y;
1016     int acc = 0;
1017
1018     for (y = 0; y < 16; y++) {
1019         for (x = 0; x < 16; x++) {
1020             acc += FFABS(src[x + y * stride] - ref);
1021         }
1022     }
1023
1024     return acc;
1025 }
1026
1027 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1028                            uint8_t *ref, int stride)
1029 {
1030     int x, y, w, h;
1031     int acc = 0;
1032
1033     w = s->width  & ~15;
1034     h = s->height & ~15;
1035
1036     for (y = 0; y < h; y += 16) {
1037         for (x = 0; x < w; x += 16) {
1038             int offset = x + y * stride;
1039             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1040                                       stride, 16);
1041             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1042             int sae  = get_sae(src + offset, mean, stride);
1043
1044             acc += sae + 500 < sad;
1045         }
1046     }
1047     return acc;
1048 }
1049
1050
1051 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1052 {
1053     Picture *pic = NULL;
1054     int64_t pts;
1055     int i, display_picture_number = 0, ret;
1056     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1057                                                  (s->low_delay ? 0 : 1);
1058     int direct = 1;
1059
1060     if (pic_arg) {
1061         pts = pic_arg->pts;
1062         display_picture_number = s->input_picture_number++;
1063
1064         if (pts != AV_NOPTS_VALUE) {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 int64_t last = s->user_specified_pts;
1067
1068                 if (pts <= last) {
1069                     av_log(s->avctx, AV_LOG_ERROR,
1070                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1071                            pts, last);
1072                     return AVERROR(EINVAL);
1073                 }
1074
1075                 if (!s->low_delay && display_picture_number == 1)
1076                     s->dts_delta = pts - last;
1077             }
1078             s->user_specified_pts = pts;
1079         } else {
1080             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1081                 s->user_specified_pts =
1082                 pts = s->user_specified_pts + 1;
1083                 av_log(s->avctx, AV_LOG_INFO,
1084                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1085                        pts);
1086             } else {
1087                 pts = display_picture_number;
1088             }
1089         }
1090     }
1091
1092     if (pic_arg) {
1093         if (!pic_arg->buf[0])
1094             direct = 0;
1095         if (pic_arg->linesize[0] != s->linesize)
1096             direct = 0;
1097         if (pic_arg->linesize[1] != s->uvlinesize)
1098             direct = 0;
1099         if (pic_arg->linesize[2] != s->uvlinesize)
1100             direct = 0;
1101         if ((s->width & 15) || (s->height & 15))
1102             direct = 0;
1103         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1104             direct = 0;
1105         if (s->linesize & (STRIDE_ALIGN-1))
1106             direct = 0;
1107
1108         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1109                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1110
1111         if (direct) {
1112             i = ff_find_unused_picture(s, 1);
1113             if (i < 0)
1114                 return i;
1115
1116             pic = &s->picture[i];
1117             pic->reference = 3;
1118
1119             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1120                 return ret;
1121             if (ff_alloc_picture(s, pic, 1) < 0) {
1122                 return -1;
1123             }
1124         } else {
1125             i = ff_find_unused_picture(s, 0);
1126             if (i < 0)
1127                 return i;
1128
1129             pic = &s->picture[i];
1130             pic->reference = 3;
1131
1132             if (ff_alloc_picture(s, pic, 0) < 0) {
1133                 return -1;
1134             }
1135
1136             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1137                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1138                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1139                 // empty
1140             } else {
1141                 int h_chroma_shift, v_chroma_shift;
1142                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1143                                                  &h_chroma_shift,
1144                                                  &v_chroma_shift);
1145
1146                 for (i = 0; i < 3; i++) {
1147                     int src_stride = pic_arg->linesize[i];
1148                     int dst_stride = i ? s->uvlinesize : s->linesize;
1149                     int h_shift = i ? h_chroma_shift : 0;
1150                     int v_shift = i ? v_chroma_shift : 0;
1151                     int w = s->width  >> h_shift;
1152                     int h = s->height >> v_shift;
1153                     uint8_t *src = pic_arg->data[i];
1154                     uint8_t *dst = pic->f->data[i];
1155                     int vpad = 16;
1156
1157                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1158                         && !s->progressive_sequence
1159                         && FFALIGN(s->height, 32) - s->height > 16)
1160                         vpad = 32;
1161
1162                     if (!s->avctx->rc_buffer_size)
1163                         dst += INPLACE_OFFSET;
1164
1165                     if (src_stride == dst_stride)
1166                         memcpy(dst, src, src_stride * h);
1167                     else {
1168                         int h2 = h;
1169                         uint8_t *dst2 = dst;
1170                         while (h2--) {
1171                             memcpy(dst2, src, w);
1172                             dst2 += dst_stride;
1173                             src += src_stride;
1174                         }
1175                     }
1176                     if ((s->width & 15) || (s->height & (vpad-1))) {
1177                         s->mpvencdsp.draw_edges(dst, dst_stride,
1178                                                 w, h,
1179                                                 16>>h_shift,
1180                                                 vpad>>v_shift,
1181                                                 EDGE_BOTTOM);
1182                     }
1183                 }
1184             }
1185         }
1186         ret = av_frame_copy_props(pic->f, pic_arg);
1187         if (ret < 0)
1188             return ret;
1189
1190         pic->f->display_picture_number = display_picture_number;
1191         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1192     }
1193
1194     /* shift buffer entries */
1195     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1196         s->input_picture[i - 1] = s->input_picture[i];
1197
1198     s->input_picture[encoding_delay] = (Picture*) pic;
1199
1200     return 0;
1201 }
1202
1203 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1204 {
1205     int x, y, plane;
1206     int score = 0;
1207     int64_t score64 = 0;
1208
1209     for (plane = 0; plane < 3; plane++) {
1210         const int stride = p->f->linesize[plane];
1211         const int bw = plane ? 1 : 2;
1212         for (y = 0; y < s->mb_height * bw; y++) {
1213             for (x = 0; x < s->mb_width * bw; x++) {
1214                 int off = p->shared ? 0 : 16;
1215                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1216                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1217                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1218
1219                 switch (FFABS(s->avctx->frame_skip_exp)) {
1220                 case 0: score    =  FFMAX(score, v);          break;
1221                 case 1: score   += FFABS(v);                  break;
1222                 case 2: score64 += v * (int64_t)v;                       break;
1223                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1224                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1225                 }
1226             }
1227         }
1228     }
1229     emms_c();
1230
1231     if (score)
1232         score64 = score;
1233     if (s->avctx->frame_skip_exp < 0)
1234         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1235                       -1.0/s->avctx->frame_skip_exp);
1236
1237     if (score64 < s->avctx->frame_skip_threshold)
1238         return 1;
1239     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1240         return 1;
1241     return 0;
1242 }
1243
1244 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1245 {
1246     AVPacket pkt = { 0 };
1247     int ret, got_output;
1248
1249     av_init_packet(&pkt);
1250     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1251     if (ret < 0)
1252         return ret;
1253
1254     ret = pkt.size;
1255     av_free_packet(&pkt);
1256     return ret;
1257 }
1258
1259 static int estimate_best_b_count(MpegEncContext *s)
1260 {
1261     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1262     AVCodecContext *c = avcodec_alloc_context3(NULL);
1263     const int scale = s->avctx->brd_scale;
1264     int i, j, out_size, p_lambda, b_lambda, lambda2;
1265     int64_t best_rd  = INT64_MAX;
1266     int best_b_count = -1;
1267
1268     av_assert0(scale >= 0 && scale <= 3);
1269
1270     //emms_c();
1271     //s->next_picture_ptr->quality;
1272     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1273     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1274     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1275     if (!b_lambda) // FIXME we should do this somewhere else
1276         b_lambda = p_lambda;
1277     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1278                FF_LAMBDA_SHIFT;
1279
1280     c->width        = s->width  >> scale;
1281     c->height       = s->height >> scale;
1282     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1283     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1284     c->mb_decision  = s->avctx->mb_decision;
1285     c->me_cmp       = s->avctx->me_cmp;
1286     c->mb_cmp       = s->avctx->mb_cmp;
1287     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1288     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1289     c->time_base    = s->avctx->time_base;
1290     c->max_b_frames = s->max_b_frames;
1291
1292     if (avcodec_open2(c, codec, NULL) < 0)
1293         return -1;
1294
1295     for (i = 0; i < s->max_b_frames + 2; i++) {
1296         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1297                                                 s->next_picture_ptr;
1298         uint8_t *data[4];
1299
1300         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1301             pre_input = *pre_input_ptr;
1302             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1303
1304             if (!pre_input.shared && i) {
1305                 data[0] += INPLACE_OFFSET;
1306                 data[1] += INPLACE_OFFSET;
1307                 data[2] += INPLACE_OFFSET;
1308             }
1309
1310             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1311                                        s->tmp_frames[i]->linesize[0],
1312                                        data[0],
1313                                        pre_input.f->linesize[0],
1314                                        c->width, c->height);
1315             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1316                                        s->tmp_frames[i]->linesize[1],
1317                                        data[1],
1318                                        pre_input.f->linesize[1],
1319                                        c->width >> 1, c->height >> 1);
1320             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1321                                        s->tmp_frames[i]->linesize[2],
1322                                        data[2],
1323                                        pre_input.f->linesize[2],
1324                                        c->width >> 1, c->height >> 1);
1325         }
1326     }
1327
1328     for (j = 0; j < s->max_b_frames + 1; j++) {
1329         int64_t rd = 0;
1330
1331         if (!s->input_picture[j])
1332             break;
1333
1334         c->error[0] = c->error[1] = c->error[2] = 0;
1335
1336         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1337         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1338
1339         out_size = encode_frame(c, s->tmp_frames[0]);
1340
1341         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1342
1343         for (i = 0; i < s->max_b_frames + 1; i++) {
1344             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1345
1346             s->tmp_frames[i + 1]->pict_type = is_p ?
1347                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1348             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1349
1350             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1351
1352             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1353         }
1354
1355         /* get the delayed frames */
1356         while (out_size) {
1357             out_size = encode_frame(c, NULL);
1358             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1359         }
1360
1361         rd += c->error[0] + c->error[1] + c->error[2];
1362
1363         if (rd < best_rd) {
1364             best_rd = rd;
1365             best_b_count = j;
1366         }
1367     }
1368
1369     avcodec_close(c);
1370     av_freep(&c);
1371
1372     return best_b_count;
1373 }
1374
1375 static int select_input_picture(MpegEncContext *s)
1376 {
1377     int i, ret;
1378
1379     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1380         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1381     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1382
1383     /* set next picture type & ordering */
1384     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1385         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1386             if (s->picture_in_gop_number < s->gop_size &&
1387                 s->next_picture_ptr &&
1388                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1389                 // FIXME check that te gop check above is +-1 correct
1390                 av_frame_unref(s->input_picture[0]->f);
1391
1392                 ff_vbv_update(s, 0);
1393
1394                 goto no_output_pic;
1395             }
1396         }
1397
1398         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1399             !s->next_picture_ptr || s->intra_only) {
1400             s->reordered_input_picture[0] = s->input_picture[0];
1401             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1402             s->reordered_input_picture[0]->f->coded_picture_number =
1403                 s->coded_picture_number++;
1404         } else {
1405             int b_frames;
1406
1407             if (s->flags & CODEC_FLAG_PASS2) {
1408                 for (i = 0; i < s->max_b_frames + 1; i++) {
1409                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1410
1411                     if (pict_num >= s->rc_context.num_entries)
1412                         break;
1413                     if (!s->input_picture[i]) {
1414                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1415                         break;
1416                     }
1417
1418                     s->input_picture[i]->f->pict_type =
1419                         s->rc_context.entry[pict_num].new_pict_type;
1420                 }
1421             }
1422
1423             if (s->avctx->b_frame_strategy == 0) {
1424                 b_frames = s->max_b_frames;
1425                 while (b_frames && !s->input_picture[b_frames])
1426                     b_frames--;
1427             } else if (s->avctx->b_frame_strategy == 1) {
1428                 for (i = 1; i < s->max_b_frames + 1; i++) {
1429                     if (s->input_picture[i] &&
1430                         s->input_picture[i]->b_frame_score == 0) {
1431                         s->input_picture[i]->b_frame_score =
1432                             get_intra_count(s,
1433                                             s->input_picture[i    ]->f->data[0],
1434                                             s->input_picture[i - 1]->f->data[0],
1435                                             s->linesize) + 1;
1436                     }
1437                 }
1438                 for (i = 0; i < s->max_b_frames + 1; i++) {
1439                     if (!s->input_picture[i] ||
1440                         s->input_picture[i]->b_frame_score - 1 >
1441                             s->mb_num / s->avctx->b_sensitivity)
1442                         break;
1443                 }
1444
1445                 b_frames = FFMAX(0, i - 1);
1446
1447                 /* reset scores */
1448                 for (i = 0; i < b_frames + 1; i++) {
1449                     s->input_picture[i]->b_frame_score = 0;
1450                 }
1451             } else if (s->avctx->b_frame_strategy == 2) {
1452                 b_frames = estimate_best_b_count(s);
1453             } else {
1454                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1455                 b_frames = 0;
1456             }
1457
1458             emms_c();
1459
1460             for (i = b_frames - 1; i >= 0; i--) {
1461                 int type = s->input_picture[i]->f->pict_type;
1462                 if (type && type != AV_PICTURE_TYPE_B)
1463                     b_frames = i;
1464             }
1465             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1466                 b_frames == s->max_b_frames) {
1467                 av_log(s->avctx, AV_LOG_ERROR,
1468                        "warning, too many b frames in a row\n");
1469             }
1470
1471             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1472                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1473                     s->gop_size > s->picture_in_gop_number) {
1474                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1475                 } else {
1476                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1477                         b_frames = 0;
1478                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1479                 }
1480             }
1481
1482             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1483                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1484                 b_frames--;
1485
1486             s->reordered_input_picture[0] = s->input_picture[b_frames];
1487             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1488                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1489             s->reordered_input_picture[0]->f->coded_picture_number =
1490                 s->coded_picture_number++;
1491             for (i = 0; i < b_frames; i++) {
1492                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1493                 s->reordered_input_picture[i + 1]->f->pict_type =
1494                     AV_PICTURE_TYPE_B;
1495                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1496                     s->coded_picture_number++;
1497             }
1498         }
1499     }
1500 no_output_pic:
1501     if (s->reordered_input_picture[0]) {
1502         s->reordered_input_picture[0]->reference =
1503            s->reordered_input_picture[0]->f->pict_type !=
1504                AV_PICTURE_TYPE_B ? 3 : 0;
1505
1506         ff_mpeg_unref_picture(s, &s->new_picture);
1507         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1508             return ret;
1509
1510         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1511             // input is a shared pix, so we can't modifiy it -> alloc a new
1512             // one & ensure that the shared one is reuseable
1513
1514             Picture *pic;
1515             int i = ff_find_unused_picture(s, 0);
1516             if (i < 0)
1517                 return i;
1518             pic = &s->picture[i];
1519
1520             pic->reference = s->reordered_input_picture[0]->reference;
1521             if (ff_alloc_picture(s, pic, 0) < 0) {
1522                 return -1;
1523             }
1524
1525             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1526             if (ret < 0)
1527                 return ret;
1528
1529             /* mark us unused / free shared pic */
1530             av_frame_unref(s->reordered_input_picture[0]->f);
1531             s->reordered_input_picture[0]->shared = 0;
1532
1533             s->current_picture_ptr = pic;
1534         } else {
1535             // input is not a shared pix -> reuse buffer for current_pix
1536             s->current_picture_ptr = s->reordered_input_picture[0];
1537             for (i = 0; i < 4; i++) {
1538                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1539             }
1540         }
1541         ff_mpeg_unref_picture(s, &s->current_picture);
1542         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1543                                        s->current_picture_ptr)) < 0)
1544             return ret;
1545
1546         s->picture_number = s->new_picture.f->display_picture_number;
1547     } else {
1548         ff_mpeg_unref_picture(s, &s->new_picture);
1549     }
1550     return 0;
1551 }
1552
1553 static void frame_end(MpegEncContext *s)
1554 {
1555     if (s->unrestricted_mv &&
1556         s->current_picture.reference &&
1557         !s->intra_only) {
1558         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1559         int hshift = desc->log2_chroma_w;
1560         int vshift = desc->log2_chroma_h;
1561         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1562                                 s->current_picture.f->linesize[0],
1563                                 s->h_edge_pos, s->v_edge_pos,
1564                                 EDGE_WIDTH, EDGE_WIDTH,
1565                                 EDGE_TOP | EDGE_BOTTOM);
1566         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1567                                 s->current_picture.f->linesize[1],
1568                                 s->h_edge_pos >> hshift,
1569                                 s->v_edge_pos >> vshift,
1570                                 EDGE_WIDTH >> hshift,
1571                                 EDGE_WIDTH >> vshift,
1572                                 EDGE_TOP | EDGE_BOTTOM);
1573         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1574                                 s->current_picture.f->linesize[2],
1575                                 s->h_edge_pos >> hshift,
1576                                 s->v_edge_pos >> vshift,
1577                                 EDGE_WIDTH >> hshift,
1578                                 EDGE_WIDTH >> vshift,
1579                                 EDGE_TOP | EDGE_BOTTOM);
1580     }
1581
1582     emms_c();
1583
1584     s->last_pict_type                 = s->pict_type;
1585     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1586     if (s->pict_type!= AV_PICTURE_TYPE_B)
1587         s->last_non_b_pict_type = s->pict_type;
1588
1589     s->avctx->coded_frame = s->current_picture_ptr->f;
1590
1591 }
1592
1593 static void update_noise_reduction(MpegEncContext *s)
1594 {
1595     int intra, i;
1596
1597     for (intra = 0; intra < 2; intra++) {
1598         if (s->dct_count[intra] > (1 << 16)) {
1599             for (i = 0; i < 64; i++) {
1600                 s->dct_error_sum[intra][i] >>= 1;
1601             }
1602             s->dct_count[intra] >>= 1;
1603         }
1604
1605         for (i = 0; i < 64; i++) {
1606             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1607                                        s->dct_count[intra] +
1608                                        s->dct_error_sum[intra][i] / 2) /
1609                                       (s->dct_error_sum[intra][i] + 1);
1610         }
1611     }
1612 }
1613
1614 static int frame_start(MpegEncContext *s)
1615 {
1616     int ret;
1617
1618     /* mark & release old frames */
1619     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1620         s->last_picture_ptr != s->next_picture_ptr &&
1621         s->last_picture_ptr->f->buf[0]) {
1622         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1623     }
1624
1625     s->current_picture_ptr->f->pict_type = s->pict_type;
1626     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1627
1628     ff_mpeg_unref_picture(s, &s->current_picture);
1629     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1630                                    s->current_picture_ptr)) < 0)
1631         return ret;
1632
1633     if (s->pict_type != AV_PICTURE_TYPE_B) {
1634         s->last_picture_ptr = s->next_picture_ptr;
1635         if (!s->droppable)
1636             s->next_picture_ptr = s->current_picture_ptr;
1637     }
1638
1639     if (s->last_picture_ptr) {
1640         ff_mpeg_unref_picture(s, &s->last_picture);
1641         if (s->last_picture_ptr->f->buf[0] &&
1642             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1643                                        s->last_picture_ptr)) < 0)
1644             return ret;
1645     }
1646     if (s->next_picture_ptr) {
1647         ff_mpeg_unref_picture(s, &s->next_picture);
1648         if (s->next_picture_ptr->f->buf[0] &&
1649             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1650                                        s->next_picture_ptr)) < 0)
1651             return ret;
1652     }
1653
1654     if (s->picture_structure!= PICT_FRAME) {
1655         int i;
1656         for (i = 0; i < 4; i++) {
1657             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1658                 s->current_picture.f->data[i] +=
1659                     s->current_picture.f->linesize[i];
1660             }
1661             s->current_picture.f->linesize[i] *= 2;
1662             s->last_picture.f->linesize[i]    *= 2;
1663             s->next_picture.f->linesize[i]    *= 2;
1664         }
1665     }
1666
1667     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1668         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1669         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1670     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1671         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1672         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1673     } else {
1674         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1675         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1676     }
1677
1678     if (s->dct_error_sum) {
1679         av_assert2(s->avctx->noise_reduction && s->encoding);
1680         update_noise_reduction(s);
1681     }
1682
1683     return 0;
1684 }
1685
1686 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1687                           const AVFrame *pic_arg, int *got_packet)
1688 {
1689     MpegEncContext *s = avctx->priv_data;
1690     int i, stuffing_count, ret;
1691     int context_count = s->slice_context_count;
1692
1693     s->picture_in_gop_number++;
1694
1695     if (load_input_picture(s, pic_arg) < 0)
1696         return -1;
1697
1698     if (select_input_picture(s) < 0) {
1699         return -1;
1700     }
1701
1702     /* output? */
1703     if (s->new_picture.f->data[0]) {
1704         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1705             return ret;
1706         if (s->mb_info) {
1707             s->mb_info_ptr = av_packet_new_side_data(pkt,
1708                                  AV_PKT_DATA_H263_MB_INFO,
1709                                  s->mb_width*s->mb_height*12);
1710             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1711         }
1712
1713         for (i = 0; i < context_count; i++) {
1714             int start_y = s->thread_context[i]->start_mb_y;
1715             int   end_y = s->thread_context[i]->  end_mb_y;
1716             int h       = s->mb_height;
1717             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1718             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1719
1720             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1721         }
1722
1723         s->pict_type = s->new_picture.f->pict_type;
1724         //emms_c();
1725         ret = frame_start(s);
1726         if (ret < 0)
1727             return ret;
1728 vbv_retry:
1729         if (encode_picture(s, s->picture_number) < 0)
1730             return -1;
1731
1732         avctx->header_bits = s->header_bits;
1733         avctx->mv_bits     = s->mv_bits;
1734         avctx->misc_bits   = s->misc_bits;
1735         avctx->i_tex_bits  = s->i_tex_bits;
1736         avctx->p_tex_bits  = s->p_tex_bits;
1737         avctx->i_count     = s->i_count;
1738         // FIXME f/b_count in avctx
1739         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1740         avctx->skip_count  = s->skip_count;
1741
1742         frame_end(s);
1743
1744         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1745             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1746
1747         if (avctx->rc_buffer_size) {
1748             RateControlContext *rcc = &s->rc_context;
1749             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1750
1751             if (put_bits_count(&s->pb) > max_size &&
1752                 s->lambda < s->avctx->lmax) {
1753                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1754                                        (s->qscale + 1) / s->qscale);
1755                 if (s->adaptive_quant) {
1756                     int i;
1757                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1758                         s->lambda_table[i] =
1759                             FFMAX(s->lambda_table[i] + 1,
1760                                   s->lambda_table[i] * (s->qscale + 1) /
1761                                   s->qscale);
1762                 }
1763                 s->mb_skipped = 0;        // done in frame_start()
1764                 // done in encode_picture() so we must undo it
1765                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1766                     if (s->flipflop_rounding          ||
1767                         s->codec_id == AV_CODEC_ID_H263P ||
1768                         s->codec_id == AV_CODEC_ID_MPEG4)
1769                         s->no_rounding ^= 1;
1770                 }
1771                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1772                     s->time_base       = s->last_time_base;
1773                     s->last_non_b_time = s->time - s->pp_time;
1774                 }
1775                 for (i = 0; i < context_count; i++) {
1776                     PutBitContext *pb = &s->thread_context[i]->pb;
1777                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1778                 }
1779                 goto vbv_retry;
1780             }
1781
1782             av_assert0(s->avctx->rc_max_rate);
1783         }
1784
1785         if (s->flags & CODEC_FLAG_PASS1)
1786             ff_write_pass1_stats(s);
1787
1788         for (i = 0; i < 4; i++) {
1789             s->current_picture_ptr->f->error[i] =
1790             s->current_picture.f->error[i] =
1791                 s->current_picture.error[i];
1792             avctx->error[i] += s->current_picture_ptr->f->error[i];
1793         }
1794
1795         if (s->flags & CODEC_FLAG_PASS1)
1796             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1797                    avctx->i_tex_bits + avctx->p_tex_bits ==
1798                        put_bits_count(&s->pb));
1799         flush_put_bits(&s->pb);
1800         s->frame_bits  = put_bits_count(&s->pb);
1801
1802         stuffing_count = ff_vbv_update(s, s->frame_bits);
1803         s->stuffing_bits = 8*stuffing_count;
1804         if (stuffing_count) {
1805             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1806                     stuffing_count + 50) {
1807                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1808                 return -1;
1809             }
1810
1811             switch (s->codec_id) {
1812             case AV_CODEC_ID_MPEG1VIDEO:
1813             case AV_CODEC_ID_MPEG2VIDEO:
1814                 while (stuffing_count--) {
1815                     put_bits(&s->pb, 8, 0);
1816                 }
1817             break;
1818             case AV_CODEC_ID_MPEG4:
1819                 put_bits(&s->pb, 16, 0);
1820                 put_bits(&s->pb, 16, 0x1C3);
1821                 stuffing_count -= 4;
1822                 while (stuffing_count--) {
1823                     put_bits(&s->pb, 8, 0xFF);
1824                 }
1825             break;
1826             default:
1827                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1828             }
1829             flush_put_bits(&s->pb);
1830             s->frame_bits  = put_bits_count(&s->pb);
1831         }
1832
1833         /* update mpeg1/2 vbv_delay for CBR */
1834         if (s->avctx->rc_max_rate                          &&
1835             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1836             s->out_format == FMT_MPEG1                     &&
1837             90000LL * (avctx->rc_buffer_size - 1) <=
1838                 s->avctx->rc_max_rate * 0xFFFFLL) {
1839             int vbv_delay, min_delay;
1840             double inbits  = s->avctx->rc_max_rate *
1841                              av_q2d(s->avctx->time_base);
1842             int    minbits = s->frame_bits - 8 *
1843                              (s->vbv_delay_ptr - s->pb.buf - 1);
1844             double bits    = s->rc_context.buffer_index + minbits - inbits;
1845
1846             if (bits < 0)
1847                 av_log(s->avctx, AV_LOG_ERROR,
1848                        "Internal error, negative bits\n");
1849
1850             assert(s->repeat_first_field == 0);
1851
1852             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1853             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1854                         s->avctx->rc_max_rate;
1855
1856             vbv_delay = FFMAX(vbv_delay, min_delay);
1857
1858             av_assert0(vbv_delay < 0xFFFF);
1859
1860             s->vbv_delay_ptr[0] &= 0xF8;
1861             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1862             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1863             s->vbv_delay_ptr[2] &= 0x07;
1864             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1865             avctx->vbv_delay     = vbv_delay * 300;
1866         }
1867         s->total_bits     += s->frame_bits;
1868         avctx->frame_bits  = s->frame_bits;
1869
1870         pkt->pts = s->current_picture.f->pts;
1871         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1872             if (!s->current_picture.f->coded_picture_number)
1873                 pkt->dts = pkt->pts - s->dts_delta;
1874             else
1875                 pkt->dts = s->reordered_pts;
1876             s->reordered_pts = pkt->pts;
1877         } else
1878             pkt->dts = pkt->pts;
1879         if (s->current_picture.f->key_frame)
1880             pkt->flags |= AV_PKT_FLAG_KEY;
1881         if (s->mb_info)
1882             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1883     } else {
1884         s->frame_bits = 0;
1885     }
1886
1887     /* release non-reference frames */
1888     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1889         if (!s->picture[i].reference)
1890             ff_mpeg_unref_picture(s, &s->picture[i]);
1891     }
1892
1893     av_assert1((s->frame_bits & 7) == 0);
1894
1895     pkt->size = s->frame_bits / 8;
1896     *got_packet = !!pkt->size;
1897     return 0;
1898 }
1899
1900 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1901                                                 int n, int threshold)
1902 {
1903     static const char tab[64] = {
1904         3, 2, 2, 1, 1, 1, 1, 1,
1905         1, 1, 1, 1, 1, 1, 1, 1,
1906         1, 1, 1, 1, 1, 1, 1, 1,
1907         0, 0, 0, 0, 0, 0, 0, 0,
1908         0, 0, 0, 0, 0, 0, 0, 0,
1909         0, 0, 0, 0, 0, 0, 0, 0,
1910         0, 0, 0, 0, 0, 0, 0, 0,
1911         0, 0, 0, 0, 0, 0, 0, 0
1912     };
1913     int score = 0;
1914     int run = 0;
1915     int i;
1916     int16_t *block = s->block[n];
1917     const int last_index = s->block_last_index[n];
1918     int skip_dc;
1919
1920     if (threshold < 0) {
1921         skip_dc = 0;
1922         threshold = -threshold;
1923     } else
1924         skip_dc = 1;
1925
1926     /* Are all we could set to zero already zero? */
1927     if (last_index <= skip_dc - 1)
1928         return;
1929
1930     for (i = 0; i <= last_index; i++) {
1931         const int j = s->intra_scantable.permutated[i];
1932         const int level = FFABS(block[j]);
1933         if (level == 1) {
1934             if (skip_dc && i == 0)
1935                 continue;
1936             score += tab[run];
1937             run = 0;
1938         } else if (level > 1) {
1939             return;
1940         } else {
1941             run++;
1942         }
1943     }
1944     if (score >= threshold)
1945         return;
1946     for (i = skip_dc; i <= last_index; i++) {
1947         const int j = s->intra_scantable.permutated[i];
1948         block[j] = 0;
1949     }
1950     if (block[0])
1951         s->block_last_index[n] = 0;
1952     else
1953         s->block_last_index[n] = -1;
1954 }
1955
1956 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1957                                int last_index)
1958 {
1959     int i;
1960     const int maxlevel = s->max_qcoeff;
1961     const int minlevel = s->min_qcoeff;
1962     int overflow = 0;
1963
1964     if (s->mb_intra) {
1965         i = 1; // skip clipping of intra dc
1966     } else
1967         i = 0;
1968
1969     for (; i <= last_index; i++) {
1970         const int j = s->intra_scantable.permutated[i];
1971         int level = block[j];
1972
1973         if (level > maxlevel) {
1974             level = maxlevel;
1975             overflow++;
1976         } else if (level < minlevel) {
1977             level = minlevel;
1978             overflow++;
1979         }
1980
1981         block[j] = level;
1982     }
1983
1984     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1985         av_log(s->avctx, AV_LOG_INFO,
1986                "warning, clipping %d dct coefficients to %d..%d\n",
1987                overflow, minlevel, maxlevel);
1988 }
1989
1990 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1991 {
1992     int x, y;
1993     // FIXME optimize
1994     for (y = 0; y < 8; y++) {
1995         for (x = 0; x < 8; x++) {
1996             int x2, y2;
1997             int sum = 0;
1998             int sqr = 0;
1999             int count = 0;
2000
2001             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2002                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2003                     int v = ptr[x2 + y2 * stride];
2004                     sum += v;
2005                     sqr += v * v;
2006                     count++;
2007                 }
2008             }
2009             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2010         }
2011     }
2012 }
2013
2014 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2015                                                 int motion_x, int motion_y,
2016                                                 int mb_block_height,
2017                                                 int mb_block_width,
2018                                                 int mb_block_count)
2019 {
2020     int16_t weight[12][64];
2021     int16_t orig[12][64];
2022     const int mb_x = s->mb_x;
2023     const int mb_y = s->mb_y;
2024     int i;
2025     int skip_dct[12];
2026     int dct_offset = s->linesize * 8; // default for progressive frames
2027     int uv_dct_offset = s->uvlinesize * 8;
2028     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2029     ptrdiff_t wrap_y, wrap_c;
2030
2031     for (i = 0; i < mb_block_count; i++)
2032         skip_dct[i] = s->skipdct;
2033
2034     if (s->adaptive_quant) {
2035         const int last_qp = s->qscale;
2036         const int mb_xy = mb_x + mb_y * s->mb_stride;
2037
2038         s->lambda = s->lambda_table[mb_xy];
2039         update_qscale(s);
2040
2041         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2042             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2043             s->dquant = s->qscale - last_qp;
2044
2045             if (s->out_format == FMT_H263) {
2046                 s->dquant = av_clip(s->dquant, -2, 2);
2047
2048                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2049                     if (!s->mb_intra) {
2050                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2051                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2052                                 s->dquant = 0;
2053                         }
2054                         if (s->mv_type == MV_TYPE_8X8)
2055                             s->dquant = 0;
2056                     }
2057                 }
2058             }
2059         }
2060         ff_set_qscale(s, last_qp + s->dquant);
2061     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2062         ff_set_qscale(s, s->qscale + s->dquant);
2063
2064     wrap_y = s->linesize;
2065     wrap_c = s->uvlinesize;
2066     ptr_y  = s->new_picture.f->data[0] +
2067              (mb_y * 16 * wrap_y)              + mb_x * 16;
2068     ptr_cb = s->new_picture.f->data[1] +
2069              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2070     ptr_cr = s->new_picture.f->data[2] +
2071              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2072
2073     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2074         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2075         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2076         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2077         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2078                                  wrap_y, wrap_y,
2079                                  16, 16, mb_x * 16, mb_y * 16,
2080                                  s->width, s->height);
2081         ptr_y = ebuf;
2082         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2083                                  wrap_c, wrap_c,
2084                                  mb_block_width, mb_block_height,
2085                                  mb_x * mb_block_width, mb_y * mb_block_height,
2086                                  cw, ch);
2087         ptr_cb = ebuf + 16 * wrap_y;
2088         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2089                                  wrap_c, wrap_c,
2090                                  mb_block_width, mb_block_height,
2091                                  mb_x * mb_block_width, mb_y * mb_block_height,
2092                                  cw, ch);
2093         ptr_cr = ebuf + 16 * wrap_y + 16;
2094     }
2095
2096     if (s->mb_intra) {
2097         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2098             int progressive_score, interlaced_score;
2099
2100             s->interlaced_dct = 0;
2101             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2102                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2103                                                      NULL, wrap_y, 8) - 400;
2104
2105             if (progressive_score > 0) {
2106                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2107                                                         NULL, wrap_y * 2, 8) +
2108                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2109                                                         NULL, wrap_y * 2, 8);
2110                 if (progressive_score > interlaced_score) {
2111                     s->interlaced_dct = 1;
2112
2113                     dct_offset = wrap_y;
2114                     uv_dct_offset = wrap_c;
2115                     wrap_y <<= 1;
2116                     if (s->chroma_format == CHROMA_422 ||
2117                         s->chroma_format == CHROMA_444)
2118                         wrap_c <<= 1;
2119                 }
2120             }
2121         }
2122
2123         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2124         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2125         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2126         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2127
2128         if (s->flags & CODEC_FLAG_GRAY) {
2129             skip_dct[4] = 1;
2130             skip_dct[5] = 1;
2131         } else {
2132             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2133             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2134             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2135                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2136                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2137             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2138                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2139                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2140                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2141                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2142                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2143                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2144             }
2145         }
2146     } else {
2147         op_pixels_func (*op_pix)[4];
2148         qpel_mc_func (*op_qpix)[16];
2149         uint8_t *dest_y, *dest_cb, *dest_cr;
2150
2151         dest_y  = s->dest[0];
2152         dest_cb = s->dest[1];
2153         dest_cr = s->dest[2];
2154
2155         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2156             op_pix  = s->hdsp.put_pixels_tab;
2157             op_qpix = s->qdsp.put_qpel_pixels_tab;
2158         } else {
2159             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2160             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2161         }
2162
2163         if (s->mv_dir & MV_DIR_FORWARD) {
2164             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2165                           s->last_picture.f->data,
2166                           op_pix, op_qpix);
2167             op_pix  = s->hdsp.avg_pixels_tab;
2168             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2169         }
2170         if (s->mv_dir & MV_DIR_BACKWARD) {
2171             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2172                           s->next_picture.f->data,
2173                           op_pix, op_qpix);
2174         }
2175
2176         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2177             int progressive_score, interlaced_score;
2178
2179             s->interlaced_dct = 0;
2180             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2181                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2182                                                      ptr_y + wrap_y * 8,
2183                                                      wrap_y, 8) - 400;
2184
2185             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2186                 progressive_score -= 400;
2187
2188             if (progressive_score > 0) {
2189                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2190                                                         wrap_y * 2, 8) +
2191                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2192                                                         ptr_y + wrap_y,
2193                                                         wrap_y * 2, 8);
2194
2195                 if (progressive_score > interlaced_score) {
2196                     s->interlaced_dct = 1;
2197
2198                     dct_offset = wrap_y;
2199                     uv_dct_offset = wrap_c;
2200                     wrap_y <<= 1;
2201                     if (s->chroma_format == CHROMA_422)
2202                         wrap_c <<= 1;
2203                 }
2204             }
2205         }
2206
2207         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2208         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2209         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2210                             dest_y + dct_offset, wrap_y);
2211         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2212                             dest_y + dct_offset + 8, wrap_y);
2213
2214         if (s->flags & CODEC_FLAG_GRAY) {
2215             skip_dct[4] = 1;
2216             skip_dct[5] = 1;
2217         } else {
2218             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2219             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2220             if (!s->chroma_y_shift) { /* 422 */
2221                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2222                                     dest_cb + uv_dct_offset, wrap_c);
2223                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2224                                     dest_cr + uv_dct_offset, wrap_c);
2225             }
2226         }
2227         /* pre quantization */
2228         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2229                 2 * s->qscale * s->qscale) {
2230             // FIXME optimize
2231             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2232                 skip_dct[0] = 1;
2233             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2234                 skip_dct[1] = 1;
2235             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2236                                wrap_y, 8) < 20 * s->qscale)
2237                 skip_dct[2] = 1;
2238             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2239                                wrap_y, 8) < 20 * s->qscale)
2240                 skip_dct[3] = 1;
2241             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2242                 skip_dct[4] = 1;
2243             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2244                 skip_dct[5] = 1;
2245             if (!s->chroma_y_shift) { /* 422 */
2246                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2247                                    dest_cb + uv_dct_offset,
2248                                    wrap_c, 8) < 20 * s->qscale)
2249                     skip_dct[6] = 1;
2250                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2251                                    dest_cr + uv_dct_offset,
2252                                    wrap_c, 8) < 20 * s->qscale)
2253                     skip_dct[7] = 1;
2254             }
2255         }
2256     }
2257
2258     if (s->quantizer_noise_shaping) {
2259         if (!skip_dct[0])
2260             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2261         if (!skip_dct[1])
2262             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2263         if (!skip_dct[2])
2264             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2265         if (!skip_dct[3])
2266             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2267         if (!skip_dct[4])
2268             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2269         if (!skip_dct[5])
2270             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2271         if (!s->chroma_y_shift) { /* 422 */
2272             if (!skip_dct[6])
2273                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2274                                   wrap_c);
2275             if (!skip_dct[7])
2276                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2277                                   wrap_c);
2278         }
2279         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2280     }
2281
2282     /* DCT & quantize */
2283     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2284     {
2285         for (i = 0; i < mb_block_count; i++) {
2286             if (!skip_dct[i]) {
2287                 int overflow;
2288                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2289                 // FIXME we could decide to change to quantizer instead of
2290                 // clipping
2291                 // JS: I don't think that would be a good idea it could lower
2292                 //     quality instead of improve it. Just INTRADC clipping
2293                 //     deserves changes in quantizer
2294                 if (overflow)
2295                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2296             } else
2297                 s->block_last_index[i] = -1;
2298         }
2299         if (s->quantizer_noise_shaping) {
2300             for (i = 0; i < mb_block_count; i++) {
2301                 if (!skip_dct[i]) {
2302                     s->block_last_index[i] =
2303                         dct_quantize_refine(s, s->block[i], weight[i],
2304                                             orig[i], i, s->qscale);
2305                 }
2306             }
2307         }
2308
2309         if (s->luma_elim_threshold && !s->mb_intra)
2310             for (i = 0; i < 4; i++)
2311                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2312         if (s->chroma_elim_threshold && !s->mb_intra)
2313             for (i = 4; i < mb_block_count; i++)
2314                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2315
2316         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2317             for (i = 0; i < mb_block_count; i++) {
2318                 if (s->block_last_index[i] == -1)
2319                     s->coded_score[i] = INT_MAX / 256;
2320             }
2321         }
2322     }
2323
2324     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2325         s->block_last_index[4] =
2326         s->block_last_index[5] = 0;
2327         s->block[4][0] =
2328         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2329         if (!s->chroma_y_shift) { /* 422 / 444 */
2330             for (i=6; i<12; i++) {
2331                 s->block_last_index[i] = 0;
2332                 s->block[i][0] = s->block[4][0];
2333             }
2334         }
2335     }
2336
2337     // non c quantize code returns incorrect block_last_index FIXME
2338     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2339         for (i = 0; i < mb_block_count; i++) {
2340             int j;
2341             if (s->block_last_index[i] > 0) {
2342                 for (j = 63; j > 0; j--) {
2343                     if (s->block[i][s->intra_scantable.permutated[j]])
2344                         break;
2345                 }
2346                 s->block_last_index[i] = j;
2347             }
2348         }
2349     }
2350
2351     /* huffman encode */
2352     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2353     case AV_CODEC_ID_MPEG1VIDEO:
2354     case AV_CODEC_ID_MPEG2VIDEO:
2355         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2356             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2357         break;
2358     case AV_CODEC_ID_MPEG4:
2359         if (CONFIG_MPEG4_ENCODER)
2360             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2361         break;
2362     case AV_CODEC_ID_MSMPEG4V2:
2363     case AV_CODEC_ID_MSMPEG4V3:
2364     case AV_CODEC_ID_WMV1:
2365         if (CONFIG_MSMPEG4_ENCODER)
2366             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2367         break;
2368     case AV_CODEC_ID_WMV2:
2369         if (CONFIG_WMV2_ENCODER)
2370             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2371         break;
2372     case AV_CODEC_ID_H261:
2373         if (CONFIG_H261_ENCODER)
2374             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2375         break;
2376     case AV_CODEC_ID_H263:
2377     case AV_CODEC_ID_H263P:
2378     case AV_CODEC_ID_FLV1:
2379     case AV_CODEC_ID_RV10:
2380     case AV_CODEC_ID_RV20:
2381         if (CONFIG_H263_ENCODER)
2382             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2383         break;
2384     case AV_CODEC_ID_MJPEG:
2385     case AV_CODEC_ID_AMV:
2386         if (CONFIG_MJPEG_ENCODER)
2387             ff_mjpeg_encode_mb(s, s->block);
2388         break;
2389     default:
2390         av_assert1(0);
2391     }
2392 }
2393
2394 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2395 {
2396     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2397     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2398     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2399 }
2400
2401 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2402     int i;
2403
2404     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2405
2406     /* mpeg1 */
2407     d->mb_skip_run= s->mb_skip_run;
2408     for(i=0; i<3; i++)
2409         d->last_dc[i] = s->last_dc[i];
2410
2411     /* statistics */
2412     d->mv_bits= s->mv_bits;
2413     d->i_tex_bits= s->i_tex_bits;
2414     d->p_tex_bits= s->p_tex_bits;
2415     d->i_count= s->i_count;
2416     d->f_count= s->f_count;
2417     d->b_count= s->b_count;
2418     d->skip_count= s->skip_count;
2419     d->misc_bits= s->misc_bits;
2420     d->last_bits= 0;
2421
2422     d->mb_skipped= 0;
2423     d->qscale= s->qscale;
2424     d->dquant= s->dquant;
2425
2426     d->esc3_level_length= s->esc3_level_length;
2427 }
2428
2429 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2430     int i;
2431
2432     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2433     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2434
2435     /* mpeg1 */
2436     d->mb_skip_run= s->mb_skip_run;
2437     for(i=0; i<3; i++)
2438         d->last_dc[i] = s->last_dc[i];
2439
2440     /* statistics */
2441     d->mv_bits= s->mv_bits;
2442     d->i_tex_bits= s->i_tex_bits;
2443     d->p_tex_bits= s->p_tex_bits;
2444     d->i_count= s->i_count;
2445     d->f_count= s->f_count;
2446     d->b_count= s->b_count;
2447     d->skip_count= s->skip_count;
2448     d->misc_bits= s->misc_bits;
2449
2450     d->mb_intra= s->mb_intra;
2451     d->mb_skipped= s->mb_skipped;
2452     d->mv_type= s->mv_type;
2453     d->mv_dir= s->mv_dir;
2454     d->pb= s->pb;
2455     if(s->data_partitioning){
2456         d->pb2= s->pb2;
2457         d->tex_pb= s->tex_pb;
2458     }
2459     d->block= s->block;
2460     for(i=0; i<8; i++)
2461         d->block_last_index[i]= s->block_last_index[i];
2462     d->interlaced_dct= s->interlaced_dct;
2463     d->qscale= s->qscale;
2464
2465     d->esc3_level_length= s->esc3_level_length;
2466 }
2467
2468 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2469                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2470                            int *dmin, int *next_block, int motion_x, int motion_y)
2471 {
2472     int score;
2473     uint8_t *dest_backup[3];
2474
2475     copy_context_before_encode(s, backup, type);
2476
2477     s->block= s->blocks[*next_block];
2478     s->pb= pb[*next_block];
2479     if(s->data_partitioning){
2480         s->pb2   = pb2   [*next_block];
2481         s->tex_pb= tex_pb[*next_block];
2482     }
2483
2484     if(*next_block){
2485         memcpy(dest_backup, s->dest, sizeof(s->dest));
2486         s->dest[0] = s->rd_scratchpad;
2487         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2488         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2489         av_assert0(s->linesize >= 32); //FIXME
2490     }
2491
2492     encode_mb(s, motion_x, motion_y);
2493
2494     score= put_bits_count(&s->pb);
2495     if(s->data_partitioning){
2496         score+= put_bits_count(&s->pb2);
2497         score+= put_bits_count(&s->tex_pb);
2498     }
2499
2500     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2501         ff_mpv_decode_mb(s, s->block);
2502
2503         score *= s->lambda2;
2504         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2505     }
2506
2507     if(*next_block){
2508         memcpy(s->dest, dest_backup, sizeof(s->dest));
2509     }
2510
2511     if(score<*dmin){
2512         *dmin= score;
2513         *next_block^=1;
2514
2515         copy_context_after_encode(best, s, type);
2516     }
2517 }
2518
2519 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2520     uint32_t *sq = ff_square_tab + 256;
2521     int acc=0;
2522     int x,y;
2523
2524     if(w==16 && h==16)
2525         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2526     else if(w==8 && h==8)
2527         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2528
2529     for(y=0; y<h; y++){
2530         for(x=0; x<w; x++){
2531             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2532         }
2533     }
2534
2535     av_assert2(acc>=0);
2536
2537     return acc;
2538 }
2539
2540 static int sse_mb(MpegEncContext *s){
2541     int w= 16;
2542     int h= 16;
2543
2544     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2545     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2546
2547     if(w==16 && h==16)
2548       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2549         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2550                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2551                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2552       }else{
2553         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2554                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2555                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2556       }
2557     else
2558         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2559                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2560                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2561 }
2562
2563 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2564     MpegEncContext *s= *(void**)arg;
2565
2566
2567     s->me.pre_pass=1;
2568     s->me.dia_size= s->avctx->pre_dia_size;
2569     s->first_slice_line=1;
2570     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2571         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2572             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2573         }
2574         s->first_slice_line=0;
2575     }
2576
2577     s->me.pre_pass=0;
2578
2579     return 0;
2580 }
2581
2582 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2583     MpegEncContext *s= *(void**)arg;
2584
2585     ff_check_alignment();
2586
2587     s->me.dia_size= s->avctx->dia_size;
2588     s->first_slice_line=1;
2589     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2590         s->mb_x=0; //for block init below
2591         ff_init_block_index(s);
2592         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2593             s->block_index[0]+=2;
2594             s->block_index[1]+=2;
2595             s->block_index[2]+=2;
2596             s->block_index[3]+=2;
2597
2598             /* compute motion vector & mb_type and store in context */
2599             if(s->pict_type==AV_PICTURE_TYPE_B)
2600                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2601             else
2602                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2603         }
2604         s->first_slice_line=0;
2605     }
2606     return 0;
2607 }
2608
2609 static int mb_var_thread(AVCodecContext *c, void *arg){
2610     MpegEncContext *s= *(void**)arg;
2611     int mb_x, mb_y;
2612
2613     ff_check_alignment();
2614
2615     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2616         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2617             int xx = mb_x * 16;
2618             int yy = mb_y * 16;
2619             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2620             int varc;
2621             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2622
2623             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2624                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2625
2626             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2627             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2628             s->me.mb_var_sum_temp    += varc;
2629         }
2630     }
2631     return 0;
2632 }
2633
2634 static void write_slice_end(MpegEncContext *s){
2635     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2636         if(s->partitioned_frame){
2637             ff_mpeg4_merge_partitions(s);
2638         }
2639
2640         ff_mpeg4_stuffing(&s->pb);
2641     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2642         ff_mjpeg_encode_stuffing(s);
2643     }
2644
2645     avpriv_align_put_bits(&s->pb);
2646     flush_put_bits(&s->pb);
2647
2648     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2649         s->misc_bits+= get_bits_diff(s);
2650 }
2651
2652 static void write_mb_info(MpegEncContext *s)
2653 {
2654     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2655     int offset = put_bits_count(&s->pb);
2656     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2657     int gobn = s->mb_y / s->gob_index;
2658     int pred_x, pred_y;
2659     if (CONFIG_H263_ENCODER)
2660         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2661     bytestream_put_le32(&ptr, offset);
2662     bytestream_put_byte(&ptr, s->qscale);
2663     bytestream_put_byte(&ptr, gobn);
2664     bytestream_put_le16(&ptr, mba);
2665     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2666     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2667     /* 4MV not implemented */
2668     bytestream_put_byte(&ptr, 0); /* hmv2 */
2669     bytestream_put_byte(&ptr, 0); /* vmv2 */
2670 }
2671
2672 static void update_mb_info(MpegEncContext *s, int startcode)
2673 {
2674     if (!s->mb_info)
2675         return;
2676     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2677         s->mb_info_size += 12;
2678         s->prev_mb_info = s->last_mb_info;
2679     }
2680     if (startcode) {
2681         s->prev_mb_info = put_bits_count(&s->pb)/8;
2682         /* This might have incremented mb_info_size above, and we return without
2683          * actually writing any info into that slot yet. But in that case,
2684          * this will be called again at the start of the after writing the
2685          * start code, actually writing the mb info. */
2686         return;
2687     }
2688
2689     s->last_mb_info = put_bits_count(&s->pb)/8;
2690     if (!s->mb_info_size)
2691         s->mb_info_size += 12;
2692     write_mb_info(s);
2693 }
2694
2695 static int encode_thread(AVCodecContext *c, void *arg){
2696     MpegEncContext *s= *(void**)arg;
2697     int mb_x, mb_y, pdif = 0;
2698     int chr_h= 16>>s->chroma_y_shift;
2699     int i, j;
2700     MpegEncContext best_s, backup_s;
2701     uint8_t bit_buf[2][MAX_MB_BYTES];
2702     uint8_t bit_buf2[2][MAX_MB_BYTES];
2703     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2704     PutBitContext pb[2], pb2[2], tex_pb[2];
2705
2706     ff_check_alignment();
2707
2708     for(i=0; i<2; i++){
2709         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2710         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2711         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2712     }
2713
2714     s->last_bits= put_bits_count(&s->pb);
2715     s->mv_bits=0;
2716     s->misc_bits=0;
2717     s->i_tex_bits=0;
2718     s->p_tex_bits=0;
2719     s->i_count=0;
2720     s->f_count=0;
2721     s->b_count=0;
2722     s->skip_count=0;
2723
2724     for(i=0; i<3; i++){
2725         /* init last dc values */
2726         /* note: quant matrix value (8) is implied here */
2727         s->last_dc[i] = 128 << s->intra_dc_precision;
2728
2729         s->current_picture.error[i] = 0;
2730     }
2731     if(s->codec_id==AV_CODEC_ID_AMV){
2732         s->last_dc[0] = 128*8/13;
2733         s->last_dc[1] = 128*8/14;
2734         s->last_dc[2] = 128*8/14;
2735     }
2736     s->mb_skip_run = 0;
2737     memset(s->last_mv, 0, sizeof(s->last_mv));
2738
2739     s->last_mv_dir = 0;
2740
2741     switch(s->codec_id){
2742     case AV_CODEC_ID_H263:
2743     case AV_CODEC_ID_H263P:
2744     case AV_CODEC_ID_FLV1:
2745         if (CONFIG_H263_ENCODER)
2746             s->gob_index = ff_h263_get_gob_height(s);
2747         break;
2748     case AV_CODEC_ID_MPEG4:
2749         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2750             ff_mpeg4_init_partitions(s);
2751         break;
2752     }
2753
2754     s->resync_mb_x=0;
2755     s->resync_mb_y=0;
2756     s->first_slice_line = 1;
2757     s->ptr_lastgob = s->pb.buf;
2758     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2759         s->mb_x=0;
2760         s->mb_y= mb_y;
2761
2762         ff_set_qscale(s, s->qscale);
2763         ff_init_block_index(s);
2764
2765         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2766             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2767             int mb_type= s->mb_type[xy];
2768 //            int d;
2769             int dmin= INT_MAX;
2770             int dir;
2771
2772             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2773                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2774                 return -1;
2775             }
2776             if(s->data_partitioning){
2777                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2778                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2779                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2780                     return -1;
2781                 }
2782             }
2783
2784             s->mb_x = mb_x;
2785             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2786             ff_update_block_index(s);
2787
2788             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2789                 ff_h261_reorder_mb_index(s);
2790                 xy= s->mb_y*s->mb_stride + s->mb_x;
2791                 mb_type= s->mb_type[xy];
2792             }
2793
2794             /* write gob / video packet header  */
2795             if(s->rtp_mode){
2796                 int current_packet_size, is_gob_start;
2797
2798                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2799
2800                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2801
2802                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2803
2804                 switch(s->codec_id){
2805                 case AV_CODEC_ID_H261:
2806                     is_gob_start=0;//FIXME
2807                     break;
2808                 case AV_CODEC_ID_H263:
2809                 case AV_CODEC_ID_H263P:
2810                     if(!s->h263_slice_structured)
2811                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2812                     break;
2813                 case AV_CODEC_ID_MPEG2VIDEO:
2814                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2815                 case AV_CODEC_ID_MPEG1VIDEO:
2816                     if(s->mb_skip_run) is_gob_start=0;
2817                     break;
2818                 case AV_CODEC_ID_MJPEG:
2819                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2820                     break;
2821                 }
2822
2823                 if(is_gob_start){
2824                     if(s->start_mb_y != mb_y || mb_x!=0){
2825                         write_slice_end(s);
2826
2827                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2828                             ff_mpeg4_init_partitions(s);
2829                         }
2830                     }
2831
2832                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2833                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2834
2835                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2836                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2837                         int d = 100 / s->error_rate;
2838                         if(r % d == 0){
2839                             current_packet_size=0;
2840                             s->pb.buf_ptr= s->ptr_lastgob;
2841                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2842                         }
2843                     }
2844
2845                     if (s->avctx->rtp_callback){
2846                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2847                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2848                     }
2849                     update_mb_info(s, 1);
2850
2851                     switch(s->codec_id){
2852                     case AV_CODEC_ID_MPEG4:
2853                         if (CONFIG_MPEG4_ENCODER) {
2854                             ff_mpeg4_encode_video_packet_header(s);
2855                             ff_mpeg4_clean_buffers(s);
2856                         }
2857                     break;
2858                     case AV_CODEC_ID_MPEG1VIDEO:
2859                     case AV_CODEC_ID_MPEG2VIDEO:
2860                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2861                             ff_mpeg1_encode_slice_header(s);
2862                             ff_mpeg1_clean_buffers(s);
2863                         }
2864                     break;
2865                     case AV_CODEC_ID_H263:
2866                     case AV_CODEC_ID_H263P:
2867                         if (CONFIG_H263_ENCODER)
2868                             ff_h263_encode_gob_header(s, mb_y);
2869                     break;
2870                     }
2871
2872                     if(s->flags&CODEC_FLAG_PASS1){
2873                         int bits= put_bits_count(&s->pb);
2874                         s->misc_bits+= bits - s->last_bits;
2875                         s->last_bits= bits;
2876                     }
2877
2878                     s->ptr_lastgob += current_packet_size;
2879                     s->first_slice_line=1;
2880                     s->resync_mb_x=mb_x;
2881                     s->resync_mb_y=mb_y;
2882                 }
2883             }
2884
2885             if(  (s->resync_mb_x   == s->mb_x)
2886                && s->resync_mb_y+1 == s->mb_y){
2887                 s->first_slice_line=0;
2888             }
2889
2890             s->mb_skipped=0;
2891             s->dquant=0; //only for QP_RD
2892
2893             update_mb_info(s, 0);
2894
2895             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2896                 int next_block=0;
2897                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2898
2899                 copy_context_before_encode(&backup_s, s, -1);
2900                 backup_s.pb= s->pb;
2901                 best_s.data_partitioning= s->data_partitioning;
2902                 best_s.partitioned_frame= s->partitioned_frame;
2903                 if(s->data_partitioning){
2904                     backup_s.pb2= s->pb2;
2905                     backup_s.tex_pb= s->tex_pb;
2906                 }
2907
2908                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mv_type = MV_TYPE_16X16;
2911                     s->mb_intra= 0;
2912                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2913                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2916                 }
2917                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2918                     s->mv_dir = MV_DIR_FORWARD;
2919                     s->mv_type = MV_TYPE_FIELD;
2920                     s->mb_intra= 0;
2921                     for(i=0; i<2; i++){
2922                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2923                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2924                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2925                     }
2926                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2927                                  &dmin, &next_block, 0, 0);
2928                 }
2929                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2930                     s->mv_dir = MV_DIR_FORWARD;
2931                     s->mv_type = MV_TYPE_16X16;
2932                     s->mb_intra= 0;
2933                     s->mv[0][0][0] = 0;
2934                     s->mv[0][0][1] = 0;
2935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2936                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2937                 }
2938                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2939                     s->mv_dir = MV_DIR_FORWARD;
2940                     s->mv_type = MV_TYPE_8X8;
2941                     s->mb_intra= 0;
2942                     for(i=0; i<4; i++){
2943                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2944                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2945                     }
2946                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2947                                  &dmin, &next_block, 0, 0);
2948                 }
2949                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2950                     s->mv_dir = MV_DIR_FORWARD;
2951                     s->mv_type = MV_TYPE_16X16;
2952                     s->mb_intra= 0;
2953                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2954                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2955                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2956                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2957                 }
2958                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2959                     s->mv_dir = MV_DIR_BACKWARD;
2960                     s->mv_type = MV_TYPE_16X16;
2961                     s->mb_intra= 0;
2962                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2963                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2964                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2965                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2966                 }
2967                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2968                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2969                     s->mv_type = MV_TYPE_16X16;
2970                     s->mb_intra= 0;
2971                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2972                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2973                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2974                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2975                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2976                                  &dmin, &next_block, 0, 0);
2977                 }
2978                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2979                     s->mv_dir = MV_DIR_FORWARD;
2980                     s->mv_type = MV_TYPE_FIELD;
2981                     s->mb_intra= 0;
2982                     for(i=0; i<2; i++){
2983                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2984                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2985                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2986                     }
2987                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2988                                  &dmin, &next_block, 0, 0);
2989                 }
2990                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2991                     s->mv_dir = MV_DIR_BACKWARD;
2992                     s->mv_type = MV_TYPE_FIELD;
2993                     s->mb_intra= 0;
2994                     for(i=0; i<2; i++){
2995                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2996                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2997                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2998                     }
2999                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3000                                  &dmin, &next_block, 0, 0);
3001                 }
3002                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3003                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3004                     s->mv_type = MV_TYPE_FIELD;
3005                     s->mb_intra= 0;
3006                     for(dir=0; dir<2; dir++){
3007                         for(i=0; i<2; i++){
3008                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3009                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3010                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3011                         }
3012                     }
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, 0, 0);
3015                 }
3016                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3017                     s->mv_dir = 0;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 1;
3020                     s->mv[0][0][0] = 0;
3021                     s->mv[0][0][1] = 0;
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, 0, 0);
3024                     if(s->h263_pred || s->h263_aic){
3025                         if(best_s.mb_intra)
3026                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3027                         else
3028                             ff_clean_intra_table_entries(s); //old mode?
3029                     }
3030                 }
3031
3032                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3033                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3034                         const int last_qp= backup_s.qscale;
3035                         int qpi, qp, dc[6];
3036                         int16_t ac[6][16];
3037                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3038                         static const int dquant_tab[4]={-1,1,-2,2};
3039                         int storecoefs = s->mb_intra && s->dc_val[0];
3040
3041                         av_assert2(backup_s.dquant == 0);
3042
3043                         //FIXME intra
3044                         s->mv_dir= best_s.mv_dir;
3045                         s->mv_type = MV_TYPE_16X16;
3046                         s->mb_intra= best_s.mb_intra;
3047                         s->mv[0][0][0] = best_s.mv[0][0][0];
3048                         s->mv[0][0][1] = best_s.mv[0][0][1];
3049                         s->mv[1][0][0] = best_s.mv[1][0][0];
3050                         s->mv[1][0][1] = best_s.mv[1][0][1];
3051
3052                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3053                         for(; qpi<4; qpi++){
3054                             int dquant= dquant_tab[qpi];
3055                             qp= last_qp + dquant;
3056                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3057                                 continue;
3058                             backup_s.dquant= dquant;
3059                             if(storecoefs){
3060                                 for(i=0; i<6; i++){
3061                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3062                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3063                                 }
3064                             }
3065
3066                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3067                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3068                             if(best_s.qscale != qp){
3069                                 if(storecoefs){
3070                                     for(i=0; i<6; i++){
3071                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3072                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3073                                     }
3074                                 }
3075                             }
3076                         }
3077                     }
3078                 }
3079                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3080                     int mx= s->b_direct_mv_table[xy][0];
3081                     int my= s->b_direct_mv_table[xy][1];
3082
3083                     backup_s.dquant = 0;
3084                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3085                     s->mb_intra= 0;
3086                     ff_mpeg4_set_direct_mv(s, mx, my);
3087                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3088                                  &dmin, &next_block, mx, my);
3089                 }
3090                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3091                     backup_s.dquant = 0;
3092                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3093                     s->mb_intra= 0;
3094                     ff_mpeg4_set_direct_mv(s, 0, 0);
3095                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3096                                  &dmin, &next_block, 0, 0);
3097                 }
3098                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3099                     int coded=0;
3100                     for(i=0; i<6; i++)
3101                         coded |= s->block_last_index[i];
3102                     if(coded){
3103                         int mx,my;
3104                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3105                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3106                             mx=my=0; //FIXME find the one we actually used
3107                             ff_mpeg4_set_direct_mv(s, mx, my);
3108                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3109                             mx= s->mv[1][0][0];
3110                             my= s->mv[1][0][1];
3111                         }else{
3112                             mx= s->mv[0][0][0];
3113                             my= s->mv[0][0][1];
3114                         }
3115
3116                         s->mv_dir= best_s.mv_dir;
3117                         s->mv_type = best_s.mv_type;
3118                         s->mb_intra= 0;
3119 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3120                         s->mv[0][0][1] = best_s.mv[0][0][1];
3121                         s->mv[1][0][0] = best_s.mv[1][0][0];
3122                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3123                         backup_s.dquant= 0;
3124                         s->skipdct=1;
3125                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3126                                         &dmin, &next_block, mx, my);
3127                         s->skipdct=0;
3128                     }
3129                 }
3130
3131                 s->current_picture.qscale_table[xy] = best_s.qscale;
3132
3133                 copy_context_after_encode(s, &best_s, -1);
3134
3135                 pb_bits_count= put_bits_count(&s->pb);
3136                 flush_put_bits(&s->pb);
3137                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3138                 s->pb= backup_s.pb;
3139
3140                 if(s->data_partitioning){
3141                     pb2_bits_count= put_bits_count(&s->pb2);
3142                     flush_put_bits(&s->pb2);
3143                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3144                     s->pb2= backup_s.pb2;
3145
3146                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3147                     flush_put_bits(&s->tex_pb);
3148                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3149                     s->tex_pb= backup_s.tex_pb;
3150                 }
3151                 s->last_bits= put_bits_count(&s->pb);
3152
3153                 if (CONFIG_H263_ENCODER &&
3154                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3155                     ff_h263_update_motion_val(s);
3156
3157                 if(next_block==0){ //FIXME 16 vs linesize16
3158                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3159                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3160                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3161                 }
3162
3163                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3164                     ff_mpv_decode_mb(s, s->block);
3165             } else {
3166                 int motion_x = 0, motion_y = 0;
3167                 s->mv_type=MV_TYPE_16X16;
3168                 // only one MB-Type possible
3169
3170                 switch(mb_type){
3171                 case CANDIDATE_MB_TYPE_INTRA:
3172                     s->mv_dir = 0;
3173                     s->mb_intra= 1;
3174                     motion_x= s->mv[0][0][0] = 0;
3175                     motion_y= s->mv[0][0][1] = 0;
3176                     break;
3177                 case CANDIDATE_MB_TYPE_INTER:
3178                     s->mv_dir = MV_DIR_FORWARD;
3179                     s->mb_intra= 0;
3180                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3181                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3182                     break;
3183                 case CANDIDATE_MB_TYPE_INTER_I:
3184                     s->mv_dir = MV_DIR_FORWARD;
3185                     s->mv_type = MV_TYPE_FIELD;
3186                     s->mb_intra= 0;
3187                     for(i=0; i<2; i++){
3188                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3189                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3190                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3191                     }
3192                     break;
3193                 case CANDIDATE_MB_TYPE_INTER4V:
3194                     s->mv_dir = MV_DIR_FORWARD;
3195                     s->mv_type = MV_TYPE_8X8;
3196                     s->mb_intra= 0;
3197                     for(i=0; i<4; i++){
3198                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3199                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3200                     }
3201                     break;
3202                 case CANDIDATE_MB_TYPE_DIRECT:
3203                     if (CONFIG_MPEG4_ENCODER) {
3204                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3205                         s->mb_intra= 0;
3206                         motion_x=s->b_direct_mv_table[xy][0];
3207                         motion_y=s->b_direct_mv_table[xy][1];
3208                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3209                     }
3210                     break;
3211                 case CANDIDATE_MB_TYPE_DIRECT0:
3212                     if (CONFIG_MPEG4_ENCODER) {
3213                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3214                         s->mb_intra= 0;
3215                         ff_mpeg4_set_direct_mv(s, 0, 0);
3216                     }
3217                     break;
3218                 case CANDIDATE_MB_TYPE_BIDIR:
3219                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3220                     s->mb_intra= 0;
3221                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3222                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3223                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3224                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3225                     break;
3226                 case CANDIDATE_MB_TYPE_BACKWARD:
3227                     s->mv_dir = MV_DIR_BACKWARD;
3228                     s->mb_intra= 0;
3229                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3230                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3231                     break;
3232                 case CANDIDATE_MB_TYPE_FORWARD:
3233                     s->mv_dir = MV_DIR_FORWARD;
3234                     s->mb_intra= 0;
3235                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3236                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3237                     break;
3238                 case CANDIDATE_MB_TYPE_FORWARD_I:
3239                     s->mv_dir = MV_DIR_FORWARD;
3240                     s->mv_type = MV_TYPE_FIELD;
3241                     s->mb_intra= 0;
3242                     for(i=0; i<2; i++){
3243                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3244                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3245                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3246                     }
3247                     break;
3248                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3249                     s->mv_dir = MV_DIR_BACKWARD;
3250                     s->mv_type = MV_TYPE_FIELD;
3251                     s->mb_intra= 0;
3252                     for(i=0; i<2; i++){
3253                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3254                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3255                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3256                     }
3257                     break;
3258                 case CANDIDATE_MB_TYPE_BIDIR_I:
3259                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3260                     s->mv_type = MV_TYPE_FIELD;
3261                     s->mb_intra= 0;
3262                     for(dir=0; dir<2; dir++){
3263                         for(i=0; i<2; i++){
3264                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3265                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3266                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3267                         }
3268                     }
3269                     break;
3270                 default:
3271                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3272                 }
3273
3274                 encode_mb(s, motion_x, motion_y);
3275
3276                 // RAL: Update last macroblock type
3277                 s->last_mv_dir = s->mv_dir;
3278
3279                 if (CONFIG_H263_ENCODER &&
3280                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3281                     ff_h263_update_motion_val(s);
3282
3283                 ff_mpv_decode_mb(s, s->block);
3284             }
3285
3286             /* clean the MV table in IPS frames for direct mode in B frames */
3287             if(s->mb_intra /* && I,P,S_TYPE */){
3288                 s->p_mv_table[xy][0]=0;
3289                 s->p_mv_table[xy][1]=0;
3290             }
3291
3292             if(s->flags&CODEC_FLAG_PSNR){
3293                 int w= 16;
3294                 int h= 16;
3295
3296                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3297                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3298
3299                 s->current_picture.error[0] += sse(
3300                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3301                     s->dest[0], w, h, s->linesize);
3302                 s->current_picture.error[1] += sse(
3303                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3304                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3305                 s->current_picture.error[2] += sse(
3306                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3307                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3308             }
3309             if(s->loop_filter){
3310                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3311                     ff_h263_loop_filter(s);
3312             }
3313             av_dlog(s->avctx, "MB %d %d bits\n",
3314                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3315         }
3316     }
3317
3318     //not beautiful here but we must write it before flushing so it has to be here
3319     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3320         ff_msmpeg4_encode_ext_header(s);
3321
3322     write_slice_end(s);
3323
3324     /* Send the last GOB if RTP */
3325     if (s->avctx->rtp_callback) {
3326         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3327         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3328         /* Call the RTP callback to send the last GOB */
3329         emms_c();
3330         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3331     }
3332
3333     return 0;
3334 }
3335
3336 #define MERGE(field) dst->field += src->field; src->field=0
3337 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3338     MERGE(me.scene_change_score);
3339     MERGE(me.mc_mb_var_sum_temp);
3340     MERGE(me.mb_var_sum_temp);
3341 }
3342
3343 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3344     int i;
3345
3346     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3347     MERGE(dct_count[1]);
3348     MERGE(mv_bits);
3349     MERGE(i_tex_bits);
3350     MERGE(p_tex_bits);
3351     MERGE(i_count);
3352     MERGE(f_count);
3353     MERGE(b_count);
3354     MERGE(skip_count);
3355     MERGE(misc_bits);
3356     MERGE(er.error_count);
3357     MERGE(padding_bug_score);
3358     MERGE(current_picture.error[0]);
3359     MERGE(current_picture.error[1]);
3360     MERGE(current_picture.error[2]);
3361
3362     if(dst->avctx->noise_reduction){
3363         for(i=0; i<64; i++){
3364             MERGE(dct_error_sum[0][i]);
3365             MERGE(dct_error_sum[1][i]);
3366         }
3367     }
3368
3369     assert(put_bits_count(&src->pb) % 8 ==0);
3370     assert(put_bits_count(&dst->pb) % 8 ==0);
3371     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3372     flush_put_bits(&dst->pb);
3373 }
3374
3375 static int estimate_qp(MpegEncContext *s, int dry_run){
3376     if (s->next_lambda){
3377         s->current_picture_ptr->f->quality =
3378         s->current_picture.f->quality = s->next_lambda;
3379         if(!dry_run) s->next_lambda= 0;
3380     } else if (!s->fixed_qscale) {
3381         s->current_picture_ptr->f->quality =
3382         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3383         if (s->current_picture.f->quality < 0)
3384             return -1;
3385     }
3386
3387     if(s->adaptive_quant){
3388         switch(s->codec_id){
3389         case AV_CODEC_ID_MPEG4:
3390             if (CONFIG_MPEG4_ENCODER)
3391                 ff_clean_mpeg4_qscales(s);
3392             break;
3393         case AV_CODEC_ID_H263:
3394         case AV_CODEC_ID_H263P:
3395         case AV_CODEC_ID_FLV1:
3396             if (CONFIG_H263_ENCODER)
3397                 ff_clean_h263_qscales(s);
3398             break;
3399         default:
3400             ff_init_qscale_tab(s);
3401         }
3402
3403         s->lambda= s->lambda_table[0];
3404         //FIXME broken
3405     }else
3406         s->lambda = s->current_picture.f->quality;
3407     update_qscale(s);
3408     return 0;
3409 }
3410
3411 /* must be called before writing the header */
3412 static void set_frame_distances(MpegEncContext * s){
3413     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3414     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3415
3416     if(s->pict_type==AV_PICTURE_TYPE_B){
3417         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3418         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3419     }else{
3420         s->pp_time= s->time - s->last_non_b_time;
3421         s->last_non_b_time= s->time;
3422         assert(s->picture_number==0 || s->pp_time > 0);
3423     }
3424 }
3425
3426 static int encode_picture(MpegEncContext *s, int picture_number)
3427 {
3428     int i, ret;
3429     int bits;
3430     int context_count = s->slice_context_count;
3431
3432     s->picture_number = picture_number;
3433
3434     /* Reset the average MB variance */
3435     s->me.mb_var_sum_temp    =
3436     s->me.mc_mb_var_sum_temp = 0;
3437
3438     /* we need to initialize some time vars before we can encode b-frames */
3439     // RAL: Condition added for MPEG1VIDEO
3440     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3441         set_frame_distances(s);
3442     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3443         ff_set_mpeg4_time(s);
3444
3445     s->me.scene_change_score=0;
3446
3447 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3448
3449     if(s->pict_type==AV_PICTURE_TYPE_I){
3450         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3451         else                        s->no_rounding=0;
3452     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3453         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3454             s->no_rounding ^= 1;
3455     }
3456
3457     if(s->flags & CODEC_FLAG_PASS2){
3458         if (estimate_qp(s,1) < 0)
3459             return -1;
3460         ff_get_2pass_fcode(s);
3461     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3462         if(s->pict_type==AV_PICTURE_TYPE_B)
3463             s->lambda= s->last_lambda_for[s->pict_type];
3464         else
3465             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3466         update_qscale(s);
3467     }
3468
3469     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3470         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3471         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3472         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3473         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3474     }
3475
3476     s->mb_intra=0; //for the rate distortion & bit compare functions
3477     for(i=1; i<context_count; i++){
3478         ret = ff_update_duplicate_context(s->thread_context[i], s);
3479         if (ret < 0)
3480             return ret;
3481     }
3482
3483     if(ff_init_me(s)<0)
3484         return -1;
3485
3486     /* Estimate motion for every MB */
3487     if(s->pict_type != AV_PICTURE_TYPE_I){
3488         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3489         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3490         if (s->pict_type != AV_PICTURE_TYPE_B) {
3491             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3492                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3493             }
3494         }
3495
3496         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3497     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3498         /* I-Frame */
3499         for(i=0; i<s->mb_stride*s->mb_height; i++)
3500             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3501
3502         if(!s->fixed_qscale){
3503             /* finding spatial complexity for I-frame rate control */
3504             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3505         }
3506     }
3507     for(i=1; i<context_count; i++){
3508         merge_context_after_me(s, s->thread_context[i]);
3509     }
3510     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3511     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3512     emms_c();
3513
3514     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3515         s->pict_type= AV_PICTURE_TYPE_I;
3516         for(i=0; i<s->mb_stride*s->mb_height; i++)
3517             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3518         if(s->msmpeg4_version >= 3)
3519             s->no_rounding=1;
3520         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3521                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3522     }
3523
3524     if(!s->umvplus){
3525         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3526             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3527
3528             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3529                 int a,b;
3530                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3531                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3532                 s->f_code= FFMAX3(s->f_code, a, b);
3533             }
3534
3535             ff_fix_long_p_mvs(s);
3536             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3537             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3538                 int j;
3539                 for(i=0; i<2; i++){
3540                     for(j=0; j<2; j++)
3541                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3542                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3543                 }
3544             }
3545         }
3546
3547         if(s->pict_type==AV_PICTURE_TYPE_B){
3548             int a, b;
3549
3550             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3551             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3552             s->f_code = FFMAX(a, b);
3553
3554             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3555             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3556             s->b_code = FFMAX(a, b);
3557
3558             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3559             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3560             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3561             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3562             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3563                 int dir, j;
3564                 for(dir=0; dir<2; dir++){
3565                     for(i=0; i<2; i++){
3566                         for(j=0; j<2; j++){
3567                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3568                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3569                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3570                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3571                         }
3572                     }
3573                 }
3574             }
3575         }
3576     }
3577
3578     if (estimate_qp(s, 0) < 0)
3579         return -1;
3580
3581     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3582         s->qscale= 3; //reduce clipping problems
3583
3584     if (s->out_format == FMT_MJPEG) {
3585         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3586         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3587
3588         if (s->avctx->intra_matrix) {
3589             chroma_matrix =
3590             luma_matrix = s->avctx->intra_matrix;
3591         }
3592         if (s->avctx->chroma_intra_matrix)
3593             chroma_matrix = s->avctx->chroma_intra_matrix;
3594
3595         /* for mjpeg, we do include qscale in the matrix */
3596         for(i=1;i<64;i++){
3597             int j = s->idsp.idct_permutation[i];
3598
3599             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3600             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3601         }
3602         s->y_dc_scale_table=
3603         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3604         s->chroma_intra_matrix[0] =
3605         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3606         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3607                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3608         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3609                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3610         s->qscale= 8;
3611     }
3612     if(s->codec_id == AV_CODEC_ID_AMV){
3613         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3614         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3615         for(i=1;i<64;i++){
3616             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3617
3618             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3619             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3620         }
3621         s->y_dc_scale_table= y;
3622         s->c_dc_scale_table= c;
3623         s->intra_matrix[0] = 13;
3624         s->chroma_intra_matrix[0] = 14;
3625         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3626                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3627         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3628                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3629         s->qscale= 8;
3630     }
3631
3632     //FIXME var duplication
3633     s->current_picture_ptr->f->key_frame =
3634     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3635     s->current_picture_ptr->f->pict_type =
3636     s->current_picture.f->pict_type = s->pict_type;
3637
3638     if (s->current_picture.f->key_frame)
3639         s->picture_in_gop_number=0;
3640
3641     s->mb_x = s->mb_y = 0;
3642     s->last_bits= put_bits_count(&s->pb);
3643     switch(s->out_format) {
3644     case FMT_MJPEG:
3645         if (CONFIG_MJPEG_ENCODER)
3646             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3647                                            s->intra_matrix, s->chroma_intra_matrix);
3648         break;
3649     case FMT_H261:
3650         if (CONFIG_H261_ENCODER)
3651             ff_h261_encode_picture_header(s, picture_number);
3652         break;
3653     case FMT_H263:
3654         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3655             ff_wmv2_encode_picture_header(s, picture_number);
3656         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3657             ff_msmpeg4_encode_picture_header(s, picture_number);
3658         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3659             ff_mpeg4_encode_picture_header(s, picture_number);
3660         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3661             ff_rv10_encode_picture_header(s, picture_number);
3662         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3663             ff_rv20_encode_picture_header(s, picture_number);
3664         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3665             ff_flv_encode_picture_header(s, picture_number);
3666         else if (CONFIG_H263_ENCODER)
3667             ff_h263_encode_picture_header(s, picture_number);
3668         break;
3669     case FMT_MPEG1:
3670         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3671             ff_mpeg1_encode_picture_header(s, picture_number);
3672         break;
3673     default:
3674         av_assert0(0);
3675     }
3676     bits= put_bits_count(&s->pb);
3677     s->header_bits= bits - s->last_bits;
3678
3679     for(i=1; i<context_count; i++){
3680         update_duplicate_context_after_me(s->thread_context[i], s);
3681     }
3682     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3683     for(i=1; i<context_count; i++){
3684         merge_context_after_encode(s, s->thread_context[i]);
3685     }
3686     emms_c();
3687     return 0;
3688 }
3689
3690 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3691     const int intra= s->mb_intra;
3692     int i;
3693
3694     s->dct_count[intra]++;
3695
3696     for(i=0; i<64; i++){
3697         int level= block[i];
3698
3699         if(level){
3700             if(level>0){
3701                 s->dct_error_sum[intra][i] += level;
3702                 level -= s->dct_offset[intra][i];
3703                 if(level<0) level=0;
3704             }else{
3705                 s->dct_error_sum[intra][i] -= level;
3706                 level += s->dct_offset[intra][i];
3707                 if(level>0) level=0;
3708             }
3709             block[i]= level;
3710         }
3711     }
3712 }
3713
3714 static int dct_quantize_trellis_c(MpegEncContext *s,
3715                                   int16_t *block, int n,
3716                                   int qscale, int *overflow){
3717     const int *qmat;
3718     const uint8_t *scantable= s->intra_scantable.scantable;
3719     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3720     int max=0;
3721     unsigned int threshold1, threshold2;
3722     int bias=0;
3723     int run_tab[65];
3724     int level_tab[65];
3725     int score_tab[65];
3726     int survivor[65];
3727     int survivor_count;
3728     int last_run=0;
3729     int last_level=0;
3730     int last_score= 0;
3731     int last_i;
3732     int coeff[2][64];
3733     int coeff_count[64];
3734     int qmul, qadd, start_i, last_non_zero, i, dc;
3735     const int esc_length= s->ac_esc_length;
3736     uint8_t * length;
3737     uint8_t * last_length;
3738     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3739
3740     s->fdsp.fdct(block);
3741
3742     if(s->dct_error_sum)
3743         s->denoise_dct(s, block);
3744     qmul= qscale*16;
3745     qadd= ((qscale-1)|1)*8;
3746
3747     if (s->mb_intra) {
3748         int q;
3749         if (!s->h263_aic) {
3750             if (n < 4)
3751                 q = s->y_dc_scale;
3752             else
3753                 q = s->c_dc_scale;
3754             q = q << 3;
3755         } else{
3756             /* For AIC we skip quant/dequant of INTRADC */
3757             q = 1 << 3;
3758             qadd=0;
3759         }
3760
3761         /* note: block[0] is assumed to be positive */
3762         block[0] = (block[0] + (q >> 1)) / q;
3763         start_i = 1;
3764         last_non_zero = 0;
3765         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3766         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3767             bias= 1<<(QMAT_SHIFT-1);
3768         length     = s->intra_ac_vlc_length;
3769         last_length= s->intra_ac_vlc_last_length;
3770     } else {
3771         start_i = 0;
3772         last_non_zero = -1;
3773         qmat = s->q_inter_matrix[qscale];
3774         length     = s->inter_ac_vlc_length;
3775         last_length= s->inter_ac_vlc_last_length;
3776     }
3777     last_i= start_i;
3778
3779     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3780     threshold2= (threshold1<<1);
3781
3782     for(i=63; i>=start_i; i--) {
3783         const int j = scantable[i];
3784         int level = block[j] * qmat[j];
3785
3786         if(((unsigned)(level+threshold1))>threshold2){
3787             last_non_zero = i;
3788             break;
3789         }
3790     }
3791
3792     for(i=start_i; i<=last_non_zero; i++) {
3793         const int j = scantable[i];
3794         int level = block[j] * qmat[j];
3795
3796 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3797 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3798         if(((unsigned)(level+threshold1))>threshold2){
3799             if(level>0){
3800                 level= (bias + level)>>QMAT_SHIFT;
3801                 coeff[0][i]= level;
3802                 coeff[1][i]= level-1;
3803 //                coeff[2][k]= level-2;
3804             }else{
3805                 level= (bias - level)>>QMAT_SHIFT;
3806                 coeff[0][i]= -level;
3807                 coeff[1][i]= -level+1;
3808 //                coeff[2][k]= -level+2;
3809             }
3810             coeff_count[i]= FFMIN(level, 2);
3811             av_assert2(coeff_count[i]);
3812             max |=level;
3813         }else{
3814             coeff[0][i]= (level>>31)|1;
3815             coeff_count[i]= 1;
3816         }
3817     }
3818
3819     *overflow= s->max_qcoeff < max; //overflow might have happened
3820
3821     if(last_non_zero < start_i){
3822         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3823         return last_non_zero;
3824     }
3825
3826     score_tab[start_i]= 0;
3827     survivor[0]= start_i;
3828     survivor_count= 1;
3829
3830     for(i=start_i; i<=last_non_zero; i++){
3831         int level_index, j, zero_distortion;
3832         int dct_coeff= FFABS(block[ scantable[i] ]);
3833         int best_score=256*256*256*120;
3834
3835         if (s->fdsp.fdct == ff_fdct_ifast)
3836             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3837         zero_distortion= dct_coeff*dct_coeff;
3838
3839         for(level_index=0; level_index < coeff_count[i]; level_index++){
3840             int distortion;
3841             int level= coeff[level_index][i];
3842             const int alevel= FFABS(level);
3843             int unquant_coeff;
3844
3845             av_assert2(level);
3846
3847             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3848                 unquant_coeff= alevel*qmul + qadd;
3849             }else{ //MPEG1
3850                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3851                 if(s->mb_intra){
3852                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3853                         unquant_coeff =   (unquant_coeff - 1) | 1;
3854                 }else{
3855                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3856                         unquant_coeff =   (unquant_coeff - 1) | 1;
3857                 }
3858                 unquant_coeff<<= 3;
3859             }
3860
3861             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3862             level+=64;
3863             if((level&(~127)) == 0){
3864                 for(j=survivor_count-1; j>=0; j--){
3865                     int run= i - survivor[j];
3866                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3867                     score += score_tab[i-run];
3868
3869                     if(score < best_score){
3870                         best_score= score;
3871                         run_tab[i+1]= run;
3872                         level_tab[i+1]= level-64;
3873                     }
3874                 }
3875
3876                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3877                     for(j=survivor_count-1; j>=0; j--){
3878                         int run= i - survivor[j];
3879                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3880                         score += score_tab[i-run];
3881                         if(score < last_score){
3882                             last_score= score;
3883                             last_run= run;
3884                             last_level= level-64;
3885                             last_i= i+1;
3886                         }
3887                     }
3888                 }
3889             }else{
3890                 distortion += esc_length*lambda;
3891                 for(j=survivor_count-1; j>=0; j--){
3892                     int run= i - survivor[j];
3893                     int score= distortion + score_tab[i-run];
3894
3895                     if(score < best_score){
3896                         best_score= score;
3897                         run_tab[i+1]= run;
3898                         level_tab[i+1]= level-64;
3899                     }
3900                 }
3901
3902                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3903                   for(j=survivor_count-1; j>=0; j--){
3904                         int run= i - survivor[j];
3905                         int score= distortion + score_tab[i-run];
3906                         if(score < last_score){
3907                             last_score= score;
3908                             last_run= run;
3909                             last_level= level-64;
3910                             last_i= i+1;
3911                         }
3912                     }
3913                 }
3914             }
3915         }
3916
3917         score_tab[i+1]= best_score;
3918
3919         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3920         if(last_non_zero <= 27){
3921             for(; survivor_count; survivor_count--){
3922                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3923                     break;
3924             }
3925         }else{
3926             for(; survivor_count; survivor_count--){
3927                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3928                     break;
3929             }
3930         }
3931
3932         survivor[ survivor_count++ ]= i+1;
3933     }
3934
3935     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3936         last_score= 256*256*256*120;
3937         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3938             int score= score_tab[i];
3939             if(i) score += lambda*2; //FIXME exacter?
3940
3941             if(score < last_score){
3942                 last_score= score;
3943                 last_i= i;
3944                 last_level= level_tab[i];
3945                 last_run= run_tab[i];
3946             }
3947         }
3948     }
3949
3950     s->coded_score[n] = last_score;
3951
3952     dc= FFABS(block[0]);
3953     last_non_zero= last_i - 1;
3954     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3955
3956     if(last_non_zero < start_i)
3957         return last_non_zero;
3958
3959     if(last_non_zero == 0 && start_i == 0){
3960         int best_level= 0;
3961         int best_score= dc * dc;
3962
3963         for(i=0; i<coeff_count[0]; i++){
3964             int level= coeff[i][0];
3965             int alevel= FFABS(level);
3966             int unquant_coeff, score, distortion;
3967
3968             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3969                     unquant_coeff= (alevel*qmul + qadd)>>3;
3970             }else{ //MPEG1
3971                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3972                     unquant_coeff =   (unquant_coeff - 1) | 1;
3973             }
3974             unquant_coeff = (unquant_coeff + 4) >> 3;
3975             unquant_coeff<<= 3 + 3;
3976
3977             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3978             level+=64;
3979             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3980             else                    score= distortion + esc_length*lambda;
3981
3982             if(score < best_score){
3983                 best_score= score;
3984                 best_level= level - 64;
3985             }
3986         }
3987         block[0]= best_level;
3988         s->coded_score[n] = best_score - dc*dc;
3989         if(best_level == 0) return -1;
3990         else                return last_non_zero;
3991     }
3992
3993     i= last_i;
3994     av_assert2(last_level);
3995
3996     block[ perm_scantable[last_non_zero] ]= last_level;
3997     i -= last_run + 1;
3998
3999     for(; i>start_i; i -= run_tab[i] + 1){
4000         block[ perm_scantable[i-1] ]= level_tab[i];
4001     }
4002
4003     return last_non_zero;
4004 }
4005
4006 //#define REFINE_STATS 1
4007 static int16_t basis[64][64];
4008
4009 static void build_basis(uint8_t *perm){
4010     int i, j, x, y;
4011     emms_c();
4012     for(i=0; i<8; i++){
4013         for(j=0; j<8; j++){
4014             for(y=0; y<8; y++){
4015                 for(x=0; x<8; x++){
4016                     double s= 0.25*(1<<BASIS_SHIFT);
4017                     int index= 8*i + j;
4018                     int perm_index= perm[index];
4019                     if(i==0) s*= sqrt(0.5);
4020                     if(j==0) s*= sqrt(0.5);
4021                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4022                 }
4023             }
4024         }
4025     }
4026 }
4027
4028 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4029                         int16_t *block, int16_t *weight, int16_t *orig,
4030                         int n, int qscale){
4031     int16_t rem[64];
4032     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4033     const uint8_t *scantable= s->intra_scantable.scantable;
4034     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4035 //    unsigned int threshold1, threshold2;
4036 //    int bias=0;
4037     int run_tab[65];
4038     int prev_run=0;
4039     int prev_level=0;
4040     int qmul, qadd, start_i, last_non_zero, i, dc;
4041     uint8_t * length;
4042     uint8_t * last_length;
4043     int lambda;
4044     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4045 #ifdef REFINE_STATS
4046 static int count=0;
4047 static int after_last=0;
4048 static int to_zero=0;
4049 static int from_zero=0;
4050 static int raise=0;
4051 static int lower=0;
4052 static int messed_sign=0;
4053 #endif
4054
4055     if(basis[0][0] == 0)
4056         build_basis(s->idsp.idct_permutation);
4057
4058     qmul= qscale*2;
4059     qadd= (qscale-1)|1;
4060     if (s->mb_intra) {
4061         if (!s->h263_aic) {
4062             if (n < 4)
4063                 q = s->y_dc_scale;
4064             else
4065                 q = s->c_dc_scale;
4066         } else{
4067             /* For AIC we skip quant/dequant of INTRADC */
4068             q = 1;
4069             qadd=0;
4070         }
4071         q <<= RECON_SHIFT-3;
4072         /* note: block[0] is assumed to be positive */
4073         dc= block[0]*q;
4074 //        block[0] = (block[0] + (q >> 1)) / q;
4075         start_i = 1;
4076 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4077 //            bias= 1<<(QMAT_SHIFT-1);
4078         length     = s->intra_ac_vlc_length;
4079         last_length= s->intra_ac_vlc_last_length;
4080     } else {
4081         dc= 0;
4082         start_i = 0;
4083         length     = s->inter_ac_vlc_length;
4084         last_length= s->inter_ac_vlc_last_length;
4085     }
4086     last_non_zero = s->block_last_index[n];
4087
4088 #ifdef REFINE_STATS
4089 {START_TIMER
4090 #endif
4091     dc += (1<<(RECON_SHIFT-1));
4092     for(i=0; i<64; i++){
4093         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4094     }
4095 #ifdef REFINE_STATS
4096 STOP_TIMER("memset rem[]")}
4097 #endif
4098     sum=0;
4099     for(i=0; i<64; i++){
4100         int one= 36;
4101         int qns=4;
4102         int w;
4103
4104         w= FFABS(weight[i]) + qns*one;
4105         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4106
4107         weight[i] = w;
4108 //        w=weight[i] = (63*qns + (w/2)) / w;
4109
4110         av_assert2(w>0);
4111         av_assert2(w<(1<<6));
4112         sum += w*w;
4113     }
4114     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4115 #ifdef REFINE_STATS
4116 {START_TIMER
4117 #endif
4118     run=0;
4119     rle_index=0;
4120     for(i=start_i; i<=last_non_zero; i++){
4121         int j= perm_scantable[i];
4122         const int level= block[j];
4123         int coeff;
4124
4125         if(level){
4126             if(level<0) coeff= qmul*level - qadd;
4127             else        coeff= qmul*level + qadd;
4128             run_tab[rle_index++]=run;
4129             run=0;
4130
4131             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4132         }else{
4133             run++;
4134         }
4135     }
4136 #ifdef REFINE_STATS
4137 if(last_non_zero>0){
4138 STOP_TIMER("init rem[]")
4139 }
4140 }
4141
4142 {START_TIMER
4143 #endif
4144     for(;;){
4145         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4146         int best_coeff=0;
4147         int best_change=0;
4148         int run2, best_unquant_change=0, analyze_gradient;
4149 #ifdef REFINE_STATS
4150 {START_TIMER
4151 #endif
4152         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4153
4154         if(analyze_gradient){
4155 #ifdef REFINE_STATS
4156 {START_TIMER
4157 #endif
4158             for(i=0; i<64; i++){
4159                 int w= weight[i];
4160
4161                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4162             }
4163 #ifdef REFINE_STATS
4164 STOP_TIMER("rem*w*w")}
4165 {START_TIMER
4166 #endif
4167             s->fdsp.fdct(d1);
4168 #ifdef REFINE_STATS
4169 STOP_TIMER("dct")}
4170 #endif
4171         }
4172
4173         if(start_i){
4174             const int level= block[0];
4175             int change, old_coeff;
4176
4177             av_assert2(s->mb_intra);
4178
4179             old_coeff= q*level;
4180
4181             for(change=-1; change<=1; change+=2){
4182                 int new_level= level + change;
4183                 int score, new_coeff;
4184
4185                 new_coeff= q*new_level;
4186                 if(new_coeff >= 2048 || new_coeff < 0)
4187                     continue;
4188
4189                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4190                                                   new_coeff - old_coeff);
4191                 if(score<best_score){
4192                     best_score= score;
4193                     best_coeff= 0;
4194                     best_change= change;
4195                     best_unquant_change= new_coeff - old_coeff;
4196                 }
4197             }
4198         }
4199
4200         run=0;
4201         rle_index=0;
4202         run2= run_tab[rle_index++];
4203         prev_level=0;
4204         prev_run=0;
4205
4206         for(i=start_i; i<64; i++){
4207             int j= perm_scantable[i];
4208             const int level= block[j];
4209             int change, old_coeff;
4210
4211             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4212                 break;
4213
4214             if(level){
4215                 if(level<0) old_coeff= qmul*level - qadd;
4216                 else        old_coeff= qmul*level + qadd;
4217                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4218             }else{
4219                 old_coeff=0;
4220                 run2--;
4221                 av_assert2(run2>=0 || i >= last_non_zero );
4222             }
4223
4224             for(change=-1; change<=1; change+=2){
4225                 int new_level= level + change;
4226                 int score, new_coeff, unquant_change;
4227
4228                 score=0;
4229                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4230                    continue;
4231
4232                 if(new_level){
4233                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4234                     else            new_coeff= qmul*new_level + qadd;
4235                     if(new_coeff >= 2048 || new_coeff <= -2048)
4236                         continue;
4237                     //FIXME check for overflow
4238
4239                     if(level){
4240                         if(level < 63 && level > -63){
4241                             if(i < last_non_zero)
4242                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4243                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4244                             else
4245                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4246                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4247                         }
4248                     }else{
4249                         av_assert2(FFABS(new_level)==1);
4250
4251                         if(analyze_gradient){
4252                             int g= d1[ scantable[i] ];
4253                             if(g && (g^new_level) >= 0)
4254                                 continue;
4255                         }
4256
4257                         if(i < last_non_zero){
4258                             int next_i= i + run2 + 1;
4259                             int next_level= block[ perm_scantable[next_i] ] + 64;
4260
4261                             if(next_level&(~127))
4262                                 next_level= 0;
4263
4264                             if(next_i < last_non_zero)
4265                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4266                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4267                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4268                             else
4269                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4270                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4271                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4272                         }else{
4273                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4274                             if(prev_level){
4275                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4276                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4277                             }
4278                         }
4279                     }
4280                 }else{
4281                     new_coeff=0;
4282                     av_assert2(FFABS(level)==1);
4283
4284                     if(i < last_non_zero){
4285                         int next_i= i + run2 + 1;
4286                         int next_level= block[ perm_scantable[next_i] ] + 64;
4287
4288                         if(next_level&(~127))
4289                             next_level= 0;
4290
4291                         if(next_i < last_non_zero)
4292                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4293                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4294                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4295                         else
4296                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4297                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4298                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4299                     }else{
4300                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4301                         if(prev_level){
4302                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4303                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4304                         }
4305                     }
4306                 }
4307
4308                 score *= lambda;
4309
4310                 unquant_change= new_coeff - old_coeff;
4311                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4312
4313                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4314                                                    unquant_change);
4315                 if(score<best_score){
4316                     best_score= score;
4317                     best_coeff= i;
4318                     best_change= change;
4319                     best_unquant_change= unquant_change;
4320                 }
4321             }
4322             if(level){
4323                 prev_level= level + 64;
4324                 if(prev_level&(~127))
4325                     prev_level= 0;
4326                 prev_run= run;
4327                 run=0;
4328             }else{
4329                 run++;
4330             }
4331         }
4332 #ifdef REFINE_STATS
4333 STOP_TIMER("iterative step")}
4334 #endif
4335
4336         if(best_change){
4337             int j= perm_scantable[ best_coeff ];
4338
4339             block[j] += best_change;
4340
4341             if(best_coeff > last_non_zero){
4342                 last_non_zero= best_coeff;
4343                 av_assert2(block[j]);
4344 #ifdef REFINE_STATS
4345 after_last++;
4346 #endif
4347             }else{
4348 #ifdef REFINE_STATS
4349 if(block[j]){
4350     if(block[j] - best_change){
4351         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4352             raise++;
4353         }else{
4354             lower++;
4355         }
4356     }else{
4357         from_zero++;
4358     }
4359 }else{
4360     to_zero++;
4361 }
4362 #endif
4363                 for(; last_non_zero>=start_i; last_non_zero--){
4364                     if(block[perm_scantable[last_non_zero]])
4365                         break;
4366                 }
4367             }
4368 #ifdef REFINE_STATS
4369 count++;
4370 if(256*256*256*64 % count == 0){
4371     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4372 }
4373 #endif
4374             run=0;
4375             rle_index=0;
4376             for(i=start_i; i<=last_non_zero; i++){
4377                 int j= perm_scantable[i];
4378                 const int level= block[j];
4379
4380                  if(level){
4381                      run_tab[rle_index++]=run;
4382                      run=0;
4383                  }else{
4384                      run++;
4385                  }
4386             }
4387
4388             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4389         }else{
4390             break;
4391         }
4392     }
4393 #ifdef REFINE_STATS
4394 if(last_non_zero>0){
4395 STOP_TIMER("iterative search")
4396 }
4397 }
4398 #endif
4399
4400     return last_non_zero;
4401 }
4402
4403 int ff_dct_quantize_c(MpegEncContext *s,
4404                         int16_t *block, int n,
4405                         int qscale, int *overflow)
4406 {
4407     int i, j, level, last_non_zero, q, start_i;
4408     const int *qmat;
4409     const uint8_t *scantable= s->intra_scantable.scantable;
4410     int bias;
4411     int max=0;
4412     unsigned int threshold1, threshold2;
4413
4414     s->fdsp.fdct(block);
4415
4416     if(s->dct_error_sum)
4417         s->denoise_dct(s, block);
4418
4419     if (s->mb_intra) {
4420         if (!s->h263_aic) {
4421             if (n < 4)
4422                 q = s->y_dc_scale;
4423             else
4424                 q = s->c_dc_scale;
4425             q = q << 3;
4426         } else
4427             /* For AIC we skip quant/dequant of INTRADC */
4428             q = 1 << 3;
4429
4430         /* note: block[0] is assumed to be positive */
4431         block[0] = (block[0] + (q >> 1)) / q;
4432         start_i = 1;
4433         last_non_zero = 0;
4434         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4435         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4436     } else {
4437         start_i = 0;
4438         last_non_zero = -1;
4439         qmat = s->q_inter_matrix[qscale];
4440         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4441     }
4442     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4443     threshold2= (threshold1<<1);
4444     for(i=63;i>=start_i;i--) {
4445         j = scantable[i];
4446         level = block[j] * qmat[j];
4447
4448         if(((unsigned)(level+threshold1))>threshold2){
4449             last_non_zero = i;
4450             break;
4451         }else{
4452             block[j]=0;
4453         }
4454     }
4455     for(i=start_i; i<=last_non_zero; i++) {
4456         j = scantable[i];
4457         level = block[j] * qmat[j];
4458
4459 //        if(   bias+level >= (1<<QMAT_SHIFT)
4460 //           || bias-level >= (1<<QMAT_SHIFT)){
4461         if(((unsigned)(level+threshold1))>threshold2){
4462             if(level>0){
4463                 level= (bias + level)>>QMAT_SHIFT;
4464                 block[j]= level;
4465             }else{
4466                 level= (bias - level)>>QMAT_SHIFT;
4467                 block[j]= -level;
4468             }
4469             max |=level;
4470         }else{
4471             block[j]=0;
4472         }
4473     }
4474     *overflow= s->max_qcoeff < max; //overflow might have happened
4475
4476     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4477     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4478         ff_block_permute(block, s->idsp.idct_permutation,
4479                          scantable, last_non_zero);
4480
4481     return last_non_zero;
4482 }
4483
4484 #define OFFSET(x) offsetof(MpegEncContext, x)
4485 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4486 static const AVOption h263_options[] = {
4487     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4488     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4489     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4490     FF_MPV_COMMON_OPTS
4491     { NULL },
4492 };
4493
4494 static const AVClass h263_class = {
4495     .class_name = "H.263 encoder",
4496     .item_name  = av_default_item_name,
4497     .option     = h263_options,
4498     .version    = LIBAVUTIL_VERSION_INT,
4499 };
4500
4501 AVCodec ff_h263_encoder = {
4502     .name           = "h263",
4503     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4504     .type           = AVMEDIA_TYPE_VIDEO,
4505     .id             = AV_CODEC_ID_H263,
4506     .priv_data_size = sizeof(MpegEncContext),
4507     .init           = ff_mpv_encode_init,
4508     .encode2        = ff_mpv_encode_picture,
4509     .close          = ff_mpv_encode_end,
4510     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4511     .priv_class     = &h263_class,
4512 };
4513
4514 static const AVOption h263p_options[] = {
4515     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4516     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4517     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4518     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4519     FF_MPV_COMMON_OPTS
4520     { NULL },
4521 };
4522 static const AVClass h263p_class = {
4523     .class_name = "H.263p encoder",
4524     .item_name  = av_default_item_name,
4525     .option     = h263p_options,
4526     .version    = LIBAVUTIL_VERSION_INT,
4527 };
4528
4529 AVCodec ff_h263p_encoder = {
4530     .name           = "h263p",
4531     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4532     .type           = AVMEDIA_TYPE_VIDEO,
4533     .id             = AV_CODEC_ID_H263P,
4534     .priv_data_size = sizeof(MpegEncContext),
4535     .init           = ff_mpv_encode_init,
4536     .encode2        = ff_mpv_encode_picture,
4537     .close          = ff_mpv_encode_end,
4538     .capabilities   = CODEC_CAP_SLICE_THREADS,
4539     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4540     .priv_class     = &h263p_class,
4541 };
4542
4543 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4544
4545 AVCodec ff_msmpeg4v2_encoder = {
4546     .name           = "msmpeg4v2",
4547     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4548     .type           = AVMEDIA_TYPE_VIDEO,
4549     .id             = AV_CODEC_ID_MSMPEG4V2,
4550     .priv_data_size = sizeof(MpegEncContext),
4551     .init           = ff_mpv_encode_init,
4552     .encode2        = ff_mpv_encode_picture,
4553     .close          = ff_mpv_encode_end,
4554     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4555     .priv_class     = &msmpeg4v2_class,
4556 };
4557
4558 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4559
4560 AVCodec ff_msmpeg4v3_encoder = {
4561     .name           = "msmpeg4",
4562     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4563     .type           = AVMEDIA_TYPE_VIDEO,
4564     .id             = AV_CODEC_ID_MSMPEG4V3,
4565     .priv_data_size = sizeof(MpegEncContext),
4566     .init           = ff_mpv_encode_init,
4567     .encode2        = ff_mpv_encode_picture,
4568     .close          = ff_mpv_encode_end,
4569     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4570     .priv_class     = &msmpeg4v3_class,
4571 };
4572
4573 FF_MPV_GENERIC_CLASS(wmv1)
4574
4575 AVCodec ff_wmv1_encoder = {
4576     .name           = "wmv1",
4577     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4578     .type           = AVMEDIA_TYPE_VIDEO,
4579     .id             = AV_CODEC_ID_WMV1,
4580     .priv_data_size = sizeof(MpegEncContext),
4581     .init           = ff_mpv_encode_init,
4582     .encode2        = ff_mpv_encode_picture,
4583     .close          = ff_mpv_encode_end,
4584     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4585     .priv_class     = &wmv1_class,
4586 };